From dd8680018ff6df4e940b84b0e2a21f05ef0c1756 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 9 Feb 2024 16:43:20 +0100 Subject: [PATCH 01/20] Analyzer: WIP on test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed --- src/Interpreters/getHeaderForProcessingStage.cpp | 2 +- src/Storages/StorageDistributed.cpp | 1 - src/Storages/StorageSnapshot.cpp | 8 ++++++++ src/Storages/StorageSnapshot.h | 3 +++ tests/analyzer_integration_broken_tests.txt | 1 - 5 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index d16e01ef2d2..1daf776b8bc 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -153,7 +153,7 @@ Block getHeaderForProcessingStage( if (context->getSettingsRef().allow_experimental_analyzer) { - auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), storage_snapshot->metadata->getColumns()); + auto storage = std::make_shared(storage_snapshot->storage.getStorageID(), storage_snapshot->getAllColumnsDescription()); InterpreterSelectQueryAnalyzer interpreter(query, context, storage, SelectQueryOptions(processed_stage).analyze()); result = interpreter.getSampleBlock(); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 5fb404da1cf..6922261a823 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -295,7 +295,6 @@ NamesAndTypesList StorageDistributed::getVirtuals() const /// NOTE This is weird. Most of these virtual columns are part of MergeTree /// tables info. But Distributed is general-purpose engine. return NamesAndTypesList{ - NameAndTypePair("_table", std::make_shared(std::make_shared())), NameAndTypePair("_part", std::make_shared(std::make_shared())), NameAndTypePair("_part_index", std::make_shared()), NameAndTypePair("_part_uuid", std::make_shared()), diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 34c092c7208..8f5f4209efc 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -39,6 +39,14 @@ void StorageSnapshot::init() system_columns[BlockNumberColumn::name] = BlockNumberColumn::type; } +ColumnsDescription StorageSnapshot::getAllColumnsDescription() const +{ + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + auto column_names_and_types = getColumns(get_column_options); + + return ColumnsDescription{column_names_and_types}; +} + NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options) const { auto all_columns = getMetadataForQuery()->getColumns().get(options); diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index d62e118e1f2..a5724f04967 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -62,6 +62,9 @@ struct StorageSnapshot std::shared_ptr clone(DataPtr data_) const; + /// Get columns description + ColumnsDescription getAllColumnsDescription() const; + /// Get all available columns with types according to options. NamesAndTypesList getColumns(const GetColumnsOptions & options) const; diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index c04ed440c18..31f626a23f7 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -6,7 +6,6 @@ test_distributed_type_object/test.py::test_distributed_type_object test_executable_table_function/test.py::test_executable_function_input_python test_mask_sensitive_info/test.py::test_encryption_functions test_merge_table_over_distributed/test.py::test_global_in -test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task test_mysql_database_engine/test.py::test_mysql_ddl_for_mysql_database test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster From 093f629e4ed7b650efe6571bb7c7c36112b0d9f5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Feb 2024 14:12:44 +0000 Subject: [PATCH 02/20] Update reference file --- .../0_stateless/02890_describe_table_options.reference | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 5d99df36bb4..8842eca3311 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -65,7 +65,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 1 │ │ _part │ LowCardinality(String) │ │ │ │ │ │ 1 │ │ _part_index │ UInt64 │ │ │ │ │ │ 1 │ │ _part_uuid │ UUID │ │ │ │ │ │ 1 │ @@ -104,7 +103,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ DEFAULT │ ('foo', 0) │ │ ZSTD(1) │ │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ │ _part │ LowCardinality(String) │ │ │ │ │ │ 0 │ 1 │ │ _part_index │ UInt64 │ │ │ │ │ │ 0 │ 1 │ │ _part_uuid │ UUID │ │ │ │ │ │ 0 │ 1 │ @@ -183,7 +181,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ 0 │ -│ _table │ LowCardinality(String) │ 1 │ │ _part │ LowCardinality(String) │ 1 │ │ _part_index │ UInt64 │ 1 │ │ _part_uuid │ UUID │ 1 │ @@ -222,7 +219,6 @@ DESCRIBE remote(default, currentDatabase(), t_describe_options) FORMAT PrettyCom │ t │ Tuple( a String, b UInt64) │ 0 │ 0 │ -│ _table │ LowCardinality(String) │ 0 │ 1 │ │ _part │ LowCardinality(String) │ 0 │ 1 │ │ _part_index │ UInt64 │ 0 │ 1 │ │ _part_uuid │ UUID │ 0 │ 1 │ From 8d2ad5383bea6cab2e2107e991f17359cb36ac4f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Feb 2024 15:19:08 +0000 Subject: [PATCH 03/20] Fix execution name for constants --- src/Planner/PlannerActionsVisitor.cpp | 9 +++-- src/Storages/StorageMerge.cpp | 47 ++++++++++++++------------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 511e9396a35..8fc200e7d38 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -88,7 +88,10 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); - result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + if (constant_node.hasSourceExpression()) + result = calculateActionNodeName(constant_node.getSourceExpression()); + else + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); break; } case QueryTreeNodeType::FUNCTION: @@ -527,7 +530,9 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & constant_literal = constant_node.getValue(); const auto & constant_type = constant_node.getResultType(); - auto constant_node_name = calculateConstantActionNodeName(constant_literal, constant_type); + auto constant_node_name = constant_node.hasSourceExpression() + ? action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()) + : calculateConstantActionNodeName(constant_literal, constant_type); ColumnWithTypeAndName column; column.name = constant_node_name; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79d7b83cada..029ab4d4e4c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1001,34 +1001,37 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && !pipe_header.has("_database")) + if (!allow_experimental_analyzer) { - ColumnWithTypeAndName column; - column.name = "_database"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); + if (has_database_virtual_column && !pipe_header.has("_database")) + { + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } - if (has_table_virtual_column && !pipe_header.has("_table")) - { - ColumnWithTypeAndName column; - column.name = "_table"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); + if (has_table_virtual_column && !pipe_header.has("_table")) + { + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } } /// Subordinary tables could have different but convertible types, like numeric types of different width. From 212245457662ebfbc7a6e865ec9518b028506bef Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 19 Feb 2024 12:41:29 +0000 Subject: [PATCH 04/20] Temporary progress --- src/Analyzer/ConstantNode.cpp | 96 ++++++++++++++------------- src/Analyzer/ConstantNode.h | 2 + src/Planner/Planner.cpp | 2 +- src/Planner/PlannerActionsVisitor.cpp | 46 +++++++++++-- src/Planner/PlannerContext.cpp | 10 ++- src/Planner/PlannerContext.h | 12 +++- src/Storages/StorageMerge.cpp | 55 ++++++++------- 7 files changed, 140 insertions(+), 83 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 69bed3dbe90..ce6da693f93 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -38,52 +38,9 @@ ConstantNode::ConstantNode(Field value_) : ConstantNode(value_, applyVisitor(FieldToDataType(), value_)) {} -void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const -{ - buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); - - if (hasAlias()) - buffer << ", alias: " << getAlias(); - - buffer << ", constant_value: " << constant_value->getValue().dump(); - buffer << ", constant_value_type: " << constant_value->getType()->getName(); - - if (getSourceExpression()) - { - buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; - getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); - } -} - -bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const -{ - const auto & rhs_typed = assert_cast(rhs); - return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; -} - -void ConstantNode::updateTreeHashImpl(HashState & hash_state) const -{ - auto type_name = constant_value->getType()->getName(); - hash_state.update(type_name.size()); - hash_state.update(type_name); - - hash_state.update(value_string.size()); - hash_state.update(value_string); -} - -QueryTreeNodePtr ConstantNode::cloneImpl() const -{ - return std::make_shared(constant_value, source_expression); -} - -ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const +bool ConstantNode::requiresCastCall() const { const auto & constant_value_literal = constant_value->getValue(); - auto constant_value_ast = std::make_shared(constant_value_literal); - - if (!options.add_cast_for_constants) - return constant_value_ast; - bool need_to_add_cast_function = false; auto constant_value_literal_type = constant_value_literal.getType(); WhichDataType constant_value_type(constant_value->getType()); @@ -131,7 +88,56 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const // Add cast if constant was created as a result of constant folding. // Constant folding may lead to type transformation and literal on shard // may have a different type. - if (need_to_add_cast_function || source_expression != nullptr) + return need_to_add_cast_function || source_expression != nullptr; +} + +void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const +{ + buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); + + if (hasAlias()) + buffer << ", alias: " << getAlias(); + + buffer << ", constant_value: " << constant_value->getValue().dump(); + buffer << ", constant_value_type: " << constant_value->getType()->getName(); + + if (getSourceExpression()) + { + buffer << '\n' << std::string(indent + 2, ' ') << "EXPRESSION" << '\n'; + getSourceExpression()->dumpTreeImpl(buffer, format_state, indent + 4); + } +} + +bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const +{ + const auto & rhs_typed = assert_cast(rhs); + return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; +} + +void ConstantNode::updateTreeHashImpl(HashState & hash_state) const +{ + auto type_name = constant_value->getType()->getName(); + hash_state.update(type_name.size()); + hash_state.update(type_name); + + hash_state.update(value_string.size()); + hash_state.update(value_string); +} + +QueryTreeNodePtr ConstantNode::cloneImpl() const +{ + return std::make_shared(constant_value, source_expression); +} + +ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const +{ + const auto & constant_value_literal = constant_value->getValue(); + auto constant_value_ast = std::make_shared(constant_value_literal); + + if (!options.add_cast_for_constants) + return constant_value_ast; + + if (requiresCastCall()) { auto constant_type_name_ast = std::make_shared(constant_value->getType()->getName()); return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast)); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 51c98a4a3b3..c0df092293d 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,6 +75,8 @@ public: return constant_value->getType(); } + bool requiresCastCall() const; + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index bcc42dbae7f..65033c6f66b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1153,7 +1153,7 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node, if (select_query_options.is_subquery) updateContextForSubqueryExecution(mutable_context); - return std::make_shared(mutable_context, std::move(global_planner_context)); + return std::make_shared(mutable_context, std::move(global_planner_context), select_query_options); } Planner::Planner(const QueryTreeNodePtr & query_tree_, diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 8fc200e7d38..b33e1a3509c 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -44,6 +44,22 @@ namespace ErrorCodes namespace { +String calculateActionNodeNameForConstant(const ConstantNode & constant_node) +{ + WriteBufferFromOwnString buffer; + if (constant_node.requiresCastCall()) + buffer << "_CAST("; + + buffer << calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + + if (constant_node.requiresCastCall()) + { + buffer << ", '" << constant_node.getResultType()->getName() << "'_String)"; + } + + return buffer.str(); +} + class ActionNodeNameHelper { public: @@ -88,10 +104,17 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); - if (constant_node.hasSourceExpression()) - result = calculateActionNodeName(constant_node.getSourceExpression()); + if (planner_context.isASTLevelOptimizationAllowed()) + { + result = calculateActionNodeNameForConstant(constant_node); + } else - result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + { + if (constant_node.hasSourceExpression()) + result = calculateActionNodeName(constant_node.getSourceExpression()); + else + result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); + } break; } case QueryTreeNodeType::FUNCTION: @@ -530,9 +553,20 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi const auto & constant_literal = constant_node.getValue(); const auto & constant_type = constant_node.getResultType(); - auto constant_node_name = constant_node.hasSourceExpression() - ? action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()) - : calculateConstantActionNodeName(constant_literal, constant_type); + auto constant_node_name = [&]() + { + if (planner_context->isASTLevelOptimizationAllowed()) + { + return calculateActionNodeNameForConstant(constant_node); + } + else + { + if (constant_node.hasSourceExpression()) + return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + else + return calculateConstantActionNodeName(constant_literal, constant_type); + } + }(); ColumnWithTypeAndName column; column.name = constant_node_name; diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 422c8c1d01f..57db84d5031 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include "Interpreters/SelectQueryOptions.h" namespace DB { @@ -41,9 +43,10 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i return column_identifiers.contains(column_identifier); } -PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_) +PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) + , select_query_options(select_query_options_) {} TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) @@ -116,6 +119,11 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } +bool PlannerContext::isASTLevelOptimizationAllowed() const +{ + return !(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options.ignore_ast_optimizations); +} + PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { const auto set_source_hash = set_source_node->getTreeHash(); diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index d7ea4fd95dd..49272429b43 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -10,6 +10,7 @@ #include #include +#include namespace DB { @@ -48,7 +49,7 @@ class PlannerContext { public: /// Create planner context with query context and global planner context - PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_); + PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_); /// Get planner context query context ContextPtr getQueryContext() const @@ -80,6 +81,11 @@ public: return global_planner_context; } + const SelectQueryOptions & getSelectQueryOptions() const + { + return select_query_options; + } + /// Get or create table expression data for table expression node. TableExpressionData & getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node); @@ -135,6 +141,8 @@ public: static SetKey createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node); PreparedSets & getPreparedSets() { return prepared_sets; } + + bool isASTLevelOptimizationAllowed() const; private: /// Query context ContextMutablePtr query_context; @@ -142,6 +150,8 @@ private: /// Global planner context GlobalPlannerContextPtr global_planner_context; + SelectQueryOptions select_query_options; + /// Column node to column identifier std::unordered_map column_node_to_column_identifier; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 029ab4d4e4c..591a0ae375e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1001,37 +1001,34 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (!allow_experimental_analyzer) + if (has_database_virtual_column && !pipe_header.has("_database")) { - if (has_database_virtual_column && !pipe_header.has("_database")) - { - ColumnWithTypeAndName column; - column.name = "_database"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } - if (has_table_virtual_column && !pipe_header.has("_table")) - { - ColumnWithTypeAndName column; - column.name = "_table"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); + if (has_table_virtual_column && !pipe_header.has("_table")) + { + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(modified_context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); } /// Subordinary tables could have different but convertible types, like numeric types of different width. @@ -1393,7 +1390,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const RowPolicyDataOpt & row_policy_data_opt, ContextMutablePtr local_context, QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage) + QueryProcessingStage::Enum processed_stage [[maybe_unused]]) { Block before_block_header = builder.getHeader(); @@ -1452,9 +1449,9 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; - if (local_context->getSettingsRef().allow_experimental_analyzer - && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) - convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; + // if (local_context->getSettingsRef().allow_experimental_analyzer + // && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) + // convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) { From 601c3c33bc802b6c95ab2371379da4da90bd2baa Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 22 Feb 2024 17:44:41 +0100 Subject: [PATCH 05/20] Set correct execution name for ConstantNode --- src/Analyzer/ConstantNode.cpp | 18 ++++++++++++++++++ src/Analyzer/ConstantNode.h | 2 ++ src/Planner/PlannerActionsVisitor.cpp | 14 ++++++++++++-- .../02227_union_match_by_name.reference | 2 +- 4 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index ce6da693f93..83446ca7d16 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -3,6 +3,7 @@ #include #include #include +#include "Analyzer/FunctionNode.h" #include #include @@ -91,6 +92,23 @@ bool ConstantNode::requiresCastCall() const return need_to_add_cast_function || source_expression != nullptr; } +bool ConstantNode::receivedFromInitiatorServer() const +{ + if (!hasSourceExpression()) + return false; + + auto * cast_function = getSourceExpression()->as(); + if (!cast_function || cast_function->getFunctionName() != "_CAST") + return false; + for (auto const & argument : cast_function->getArguments()) + { + auto * constant_arg = argument->as(); + if (!constant_arg || constant_arg->hasSourceExpression()) + return false; + } + return true; +} + void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const { buffer << std::string(indent, ' ') << "CONSTANT id: " << format_state.getNodeId(this); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index c0df092293d..18090c56630 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -77,6 +77,8 @@ public: bool requiresCastCall() const; + bool receivedFromInitiatorServer() const; + void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index b33e1a3509c..89d843a28ac 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -111,7 +111,12 @@ public: else { if (constant_node.hasSourceExpression()) - result = calculateActionNodeName(constant_node.getSourceExpression()); + { + if (constant_node.receivedFromInitiatorServer()) + result = calculateActionNodeNameForConstant(constant_node); + else + result = calculateActionNodeName(constant_node.getSourceExpression()); + } else result = calculateConstantActionNodeName(constant_node.getValue(), constant_node.getResultType()); } @@ -562,7 +567,12 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi else { if (constant_node.hasSourceExpression()) - return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + { + if (constant_node.receivedFromInitiatorServer()) + return calculateActionNodeNameForConstant(constant_node); + else + return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); + } else return calculateConstantActionNodeName(constant_literal, constant_type); } diff --git a/tests/queries/0_stateless/02227_union_match_by_name.reference b/tests/queries/0_stateless/02227_union_match_by_name.reference index c28035fab49..d726ae86de7 100644 --- a/tests/queries/0_stateless/02227_union_match_by_name.reference +++ b/tests/queries/0_stateless/02227_union_match_by_name.reference @@ -36,7 +36,7 @@ Header: avgWeighted(x, y) Nullable(Float64) Header: x Nullable(Nothing) y UInt8 Expression (Projection) - Header: NULL_Nullable(Nothing) Nullable(Nothing) + Header: _CAST(NULL_Nullable(Nothing), \'Nullable(Nothing)\'_String) Nullable(Nothing) 1_UInt8 UInt8 Expression (Change column names to column identifiers) Header: __table5.dummy UInt8 From 81df9c5a273e2f6533147889c929c4a519dc08af Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 22 Feb 2024 19:07:54 +0100 Subject: [PATCH 06/20] Use possition to merge plans --- src/Storages/StorageMerge.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 591a0ae375e..79d7b83cada 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1390,7 +1390,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const RowPolicyDataOpt & row_policy_data_opt, ContextMutablePtr local_context, QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage [[maybe_unused]]) + QueryProcessingStage::Enum processed_stage) { Block before_block_header = builder.getHeader(); @@ -1449,9 +1449,9 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; - // if (local_context->getSettingsRef().allow_experimental_analyzer - // && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) - // convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; + if (local_context->getSettingsRef().allow_experimental_analyzer + && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) + convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) { From d13b2a91c1bfbb7f59b5380ceb1a5057e88043da Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 23 Feb 2024 13:33:12 +0000 Subject: [PATCH 07/20] Fix virtual tables --- src/Interpreters/ActionsDAG.cpp | 2 +- src/Storages/StorageMerge.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 7240679abb7..7412eea5e32 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1318,7 +1318,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( size_t num_result_columns = result.size(); if (mode == MatchColumnsMode::Position && num_input_columns != num_result_columns) - throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match"); + throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match (source: {} and result: {})", num_input_columns, num_result_columns); if (add_casted_columns && mode != MatchColumnsMode::Name) throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79d7b83cada..5b18fb8f838 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1001,7 +1001,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && !pipe_header.has("_database")) + if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) { ColumnWithTypeAndName column; column.name = "_database"; @@ -1016,7 +1016,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { return std::make_shared(stream_header, adding_column_actions); }); } - if (has_table_virtual_column && !pipe_header.has("_table")) + if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) { ColumnWithTypeAndName column; column.name = "_table"; From a2cabc9fe13c0227db5f2bd2eaf090aa9b0189a5 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 27 Feb 2024 16:16:15 +0100 Subject: [PATCH 08/20] Fixup --- src/Analyzer/ConstantNode.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 83446ca7d16..e3b87edbdc6 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -100,12 +100,6 @@ bool ConstantNode::receivedFromInitiatorServer() const auto * cast_function = getSourceExpression()->as(); if (!cast_function || cast_function->getFunctionName() != "_CAST") return false; - for (auto const & argument : cast_function->getArguments()) - { - auto * constant_arg = argument->as(); - if (!constant_arg || constant_arg->hasSourceExpression()) - return false; - } return true; } From 68181ab00f356a66705ed49cc862da95bea2db5c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 1 Mar 2024 13:14:44 +0000 Subject: [PATCH 09/20] Revert "Revert "Add `toMillisecond` function"" This reverts commit 5225fc6a14bd6c1de1b70be1efaa5e052e0a338c. --- .../functions/date-time-functions.md | 60 ++++++++++++---- src/Common/DateLUTImpl.h | 40 ++++++++--- src/Functions/DateTimeTransforms.cpp | 15 ++-- src/Functions/DateTimeTransforms.h | 71 +++++++++++++------ src/Functions/toMillisecond.cpp | 18 +++++ .../02998_to_milliseconds.reference | 8 +++ .../0_stateless/02998_to_milliseconds.sql | 17 +++++ .../aspell-ignore/en/aspell-dict.txt | 1 + 8 files changed, 179 insertions(+), 51 deletions(-) create mode 100644 src/Functions/toMillisecond.cpp create mode 100644 tests/queries/0_stateless/02998_to_milliseconds.reference create mode 100644 tests/queries/0_stateless/02998_to_milliseconds.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c5b3b4cc3ae..83a3bd77cdb 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -394,8 +394,7 @@ Result: ## toYear -Converts a date or date with time to the year number (AD) as `UInt16` value. - +Returns the year component (AD) of a date or date with time. **Syntax** @@ -431,7 +430,7 @@ Result: ## toQuarter -Converts a date or date with time to the quarter number (1-4) as `UInt8` value. +Returns the quarter (1-4) of a date or date with time. **Syntax** @@ -465,10 +464,9 @@ Result: └──────────────────────────────────────────────┘ ``` - ## toMonth -Converts a date or date with time to the month number (1-12) as `UInt8` value. +Returns the month component (1-12) of a date or date with time. **Syntax** @@ -504,7 +502,7 @@ Result: ## toDayOfYear -Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value. +Returns the number of the day within the year (1-366) of a date or date with time. **Syntax** @@ -540,7 +538,7 @@ Result: ## toDayOfMonth -Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value. +Returns the number of the day within the month (1-31) of a date or date with time. **Syntax** @@ -576,7 +574,7 @@ Result: ## toDayOfWeek -Converts a date or date with time to the number of the day in the week as `UInt8` value. +Returns the number of the day within the week of a date or date with time. The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument. @@ -627,7 +625,7 @@ Result: ## toHour -Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value. +Returns the hour component (0-24) of a date with time. Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone). @@ -641,7 +639,7 @@ Alias: `HOUR` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -665,7 +663,7 @@ Result: ## toMinute -Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value. +Returns the minute component (0-59) a date with time. **Syntax** @@ -677,7 +675,7 @@ Alias: `MINUTE` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -701,7 +699,7 @@ Result: ## toSecond -Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered. +Returns the second component (0-59) of a date with time. Leap seconds are not considered. **Syntax** @@ -713,7 +711,7 @@ Alias: `SECOND` **Arguments** -- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) +- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) **Returned value** @@ -735,6 +733,40 @@ Result: └─────────────────────────────────────────────┘ ``` +## toMillisecond + +Returns the millisecond component (0-999) of a date with time. + +**Syntax** + +```sql +toMillisecond(value) +``` + +*Arguments** + +- `value` - [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) + +Alias: `MILLISECOND` + +```sql +SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)) +``` + +Result: + +```response +┌──toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))─┐ +│ 456 │ +└────────────────────────────────────────────────────────────┘ +``` + +**Returned value** + +- The millisecond in the minute (0 - 59) of the given date/time + +Type: `UInt16` + ## toUnixTimestamp Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation. diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 0e72b489ace..7bf66c0504a 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -3,13 +3,13 @@ #include #include #include +#include #include #include #include #include - #define DATE_SECONDS_PER_DAY 86400 /// Number of seconds in a day, 60 * 60 * 24 #define DATE_LUT_MIN_YEAR 1900 /// 1900 since majority of financial organizations consider 1900 as an initial year. @@ -280,9 +280,9 @@ private: static_assert(std::is_integral_v && std::is_integral_v); assert(divisor > 0); - if (likely(offset_is_whole_number_of_hours_during_epoch)) + if (offset_is_whole_number_of_hours_during_epoch) [[likely]] { - if (likely(x >= 0)) + if (x >= 0) [[likely]] return static_cast(x / divisor * divisor); /// Integer division for negative numbers rounds them towards zero (up). @@ -576,10 +576,10 @@ public: unsigned toSecond(Time t) const { - if (likely(offset_is_whole_number_of_minutes_during_epoch)) + if (offset_is_whole_number_of_minutes_during_epoch) [[likely]] { Time res = t % 60; - if (likely(res >= 0)) + if (res >= 0) [[likely]] return static_cast(res); return static_cast(res) + 60; } @@ -593,6 +593,30 @@ public: return time % 60; } + template + unsigned toMillisecond(const DateOrTime & datetime, Int64 scale_multiplier) const + { + constexpr Int64 millisecond_multiplier = 1'000; + constexpr Int64 microsecond_multiplier = 1'000 * millisecond_multiplier; + constexpr Int64 divider = microsecond_multiplier / millisecond_multiplier; + + auto components = DB::DecimalUtils::splitWithScaleMultiplier(datetime, scale_multiplier); + + if (datetime.value < 0 && components.fractional) + { + components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; + --components.whole; + } + Int64 fractional = components.fractional; + if (scale_multiplier > microsecond_multiplier) + fractional = fractional / (scale_multiplier / microsecond_multiplier); + else if (scale_multiplier < microsecond_multiplier) + fractional = fractional * (microsecond_multiplier / scale_multiplier); + + UInt16 millisecond = static_cast(fractional / divider); + return millisecond; + } + unsigned toMinute(Time t) const { if (t >= 0 && offset_is_whole_number_of_hours_during_epoch) @@ -1122,9 +1146,9 @@ public: DateOrTime toStartOfMinuteInterval(DateOrTime t, UInt64 minutes) const { Int64 divisor = 60 * minutes; - if (likely(offset_is_whole_number_of_minutes_during_epoch)) + if (offset_is_whole_number_of_minutes_during_epoch) [[likely]] { - if (likely(t >= 0)) + if (t >= 0) [[likely]] return static_cast(t / divisor * divisor); return static_cast((t + 1 - divisor) / divisor * divisor); } @@ -1339,7 +1363,7 @@ public: UInt8 saturateDayOfMonth(Int16 year, UInt8 month, UInt8 day_of_month) const { - if (likely(day_of_month <= 28)) + if (day_of_month <= 28) [[likely]] return day_of_month; UInt8 days_in_month = daysInMonth(year, month); diff --git a/src/Functions/DateTimeTransforms.cpp b/src/Functions/DateTimeTransforms.cpp index 7ec13be9d6d..006d1e94ccd 100644 --- a/src/Functions/DateTimeTransforms.cpp +++ b/src/Functions/DateTimeTransforms.cpp @@ -10,16 +10,17 @@ namespace ErrorCodes void throwDateIsNotSupported(const char * name) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", name); -} - -void throwDateTimeIsNotSupported(const char * name) -{ - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type DateTime of argument for function {}", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type Date for function {}", name); } void throwDate32IsNotSupported(const char * name) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date32 of argument for function {}", name); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type Date32 for function {}", name); } + +void throwDateTimeIsNotSupported(const char * name) +{ + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument of type DateTime for function {}", name); +} + } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index cad40d0acb8..6a5603339fc 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -6,6 +6,7 @@ #include #include #include +#include "base/Decimal.h" #include #include #include @@ -54,8 +55,8 @@ constexpr time_t MAX_DATE_TIMESTAMP = 5662310399; // 2149-06-06 23:59:59 U constexpr time_t MAX_DATETIME_DAY_NUM = 49710; // 2106-02-07 [[noreturn]] void throwDateIsNotSupported(const char * name); -[[noreturn]] void throwDateTimeIsNotSupported(const char * name); [[noreturn]] void throwDate32IsNotSupported(const char * name); +[[noreturn]] void throwDateTimeIsNotSupported(const char * name); /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform @@ -481,7 +482,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -516,7 +517,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -559,7 +560,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { @@ -602,7 +603,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) { @@ -623,7 +624,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) { @@ -644,7 +645,7 @@ struct ToStartOfInterval } static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { - throwDateIsNotSupported(TO_START_OF_INTERVAL_NAME); + throwDate32IsNotSupported(TO_START_OF_INTERVAL_NAME); } static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) { @@ -777,7 +778,7 @@ struct ToTimeImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -802,7 +803,7 @@ struct ToStartOfMinuteImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -849,7 +850,7 @@ struct ToStartOfSecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -897,7 +898,7 @@ struct ToStartOfMillisecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -941,7 +942,7 @@ struct ToStartOfMicrosecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -979,7 +980,7 @@ struct ToStartOfNanosecondImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1004,7 +1005,7 @@ struct ToStartOfFiveMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1036,7 +1037,7 @@ struct ToStartOfTenMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1068,7 +1069,7 @@ struct ToStartOfFifteenMinutesImpl } static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) { @@ -1103,7 +1104,7 @@ struct TimeSlotImpl static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) @@ -1142,7 +1143,7 @@ struct ToStartOfHourImpl static UInt32 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt32 execute(UInt16, const DateLUTImpl &) @@ -1429,7 +1430,7 @@ struct ToHourImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1456,7 +1457,7 @@ struct TimezoneOffsetImpl static time_t execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static time_t execute(UInt16, const DateLUTImpl &) @@ -1482,7 +1483,7 @@ struct ToMinuteImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1507,7 +1508,7 @@ struct ToSecondImpl } static UInt8 execute(Int32, const DateLUTImpl &) { - throwDateIsNotSupported(name); + throwDate32IsNotSupported(name); } static UInt8 execute(UInt16, const DateLUTImpl &) { @@ -1518,6 +1519,32 @@ struct ToSecondImpl using FactorTransform = ToStartOfMinuteImpl; }; +struct ToMillisecondImpl +{ + static constexpr auto name = "toMillisecond"; + + static UInt16 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl & time_zone) + { + return time_zone.toMillisecond(datetime64, scale_multiplier); + } + + static UInt16 execute(UInt32, const DateLUTImpl &) + { + return 0; + } + static UInt16 execute(Int32, const DateLUTImpl &) + { + throwDate32IsNotSupported(name); + } + static UInt16 execute(UInt16, const DateLUTImpl &) + { + throwDateIsNotSupported(name); + } + static constexpr bool hasPreimage() { return false; } + + using FactorTransform = ZeroTransform; +}; + struct ToISOYearImpl { static constexpr auto name = "toISOYear"; diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp new file mode 100644 index 00000000000..e15b56cc555 --- /dev/null +++ b/src/Functions/toMillisecond.cpp @@ -0,0 +1,18 @@ +#include +#include +#include + +namespace DB +{ + +using FunctionToMillisecond = FunctionDateOrDateTimeToSomething; + +REGISTER_FUNCTION(ToMillisecond) +{ + factory.registerFunction(); + + /// MySQL compatibility alias. + factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02998_to_milliseconds.reference b/tests/queries/0_stateless/02998_to_milliseconds.reference new file mode 100644 index 00000000000..05139c19d1d --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.reference @@ -0,0 +1,8 @@ +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30 0 0 +2023-04-21 10:20:30.123 123 123 +2023-04-21 10:20:30.123456 123 123 +2023-04-21 10:20:30.123456789 123 123 +120 +2023-04-21 10:20:30 0 +2023-04-21 10:20:30 0 diff --git a/tests/queries/0_stateless/02998_to_milliseconds.sql b/tests/queries/0_stateless/02998_to_milliseconds.sql new file mode 100644 index 00000000000..f159f6aab50 --- /dev/null +++ b/tests/queries/0_stateless/02998_to_milliseconds.sql @@ -0,0 +1,17 @@ +-- Negative tests +SELECT toMillisecond(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT toMillisecond('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toMillisecond(toDate32('2024-02-28')); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- Tests with constant and non-constant arguments +SELECT toDateTime('2023-04-21 10:20:30') AS dt, toMillisecond(dt), toMillisecond(materialize(dt)); +SELECT toDateTime64('2023-04-21 10:20:30', 0) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123', 3) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456', 6) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); +SELECT toDateTime64('2023-04-21 10:20:30.123456789', 9) AS dt64, toMillisecond(dt64), toMillisecond(materialize(dt64)); + +-- Special cases +SELECT MILLISECOND(toDateTime64('2023-04-21 10:20:30.123456', 2)); -- Alias +SELECT toNullable(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- Nullable +SELECT toLowCardinality(toDateTime('2023-04-21 10:20:30')) AS dt, toMillisecond(dt); -- LowCardinality diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6db3ac23b05..b09b41c5d70 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2542,6 +2542,7 @@ toRelativeSecondNum toRelativeWeekNum toRelativeYearNum toSecond +toMillisecond toStartOfDay toStartOfFifteenMinutes toStartOfFiveMinutes From 9c5ed092f8bbb4e5d5dad5b3819879ed6fa04be1 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 4 Mar 2024 14:15:07 +0100 Subject: [PATCH 10/20] Fix CREATE VIEW with scalar subquery #ci_set_analyzer --- src/Planner/PlannerActionsVisitor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 89d843a28ac..e5610dd6fe7 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -110,7 +110,7 @@ public: } else { - if (constant_node.hasSourceExpression()) + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) result = calculateActionNodeNameForConstant(constant_node); @@ -566,7 +566,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi } else { - if (constant_node.hasSourceExpression()) + if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) return calculateActionNodeNameForConstant(constant_node); From 7dfe8afa76806449aed35faaff1b0e7dbe87849b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 5 Mar 2024 11:11:18 +0000 Subject: [PATCH 11/20] Add in-source docs --- src/Functions/toMillisecond.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Functions/toMillisecond.cpp b/src/Functions/toMillisecond.cpp index e15b56cc555..aaef517c996 100644 --- a/src/Functions/toMillisecond.cpp +++ b/src/Functions/toMillisecond.cpp @@ -1,6 +1,7 @@ -#include +#include #include #include +#include namespace DB { @@ -9,7 +10,21 @@ using FunctionToMillisecond = FunctionDateOrDateTimeToSomething(); + factory.registerFunction( + + + FunctionDocumentation{ + .description=R"( +Returns the millisecond component (0-999) of a date with time. + )", + .syntax="toMillisecond(value)", + .arguments={{"value", "DateTime or DateTime64"}}, + .returned_value="The millisecond in the minute (0 - 59) of the given date/time", + .examples{ + {"toMillisecond", "SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3)", "456"}}, + .categories{"Dates and Times"} + } + ); /// MySQL compatibility alias. factory.registerAlias("MILLISECOND", "toMillisecond", FunctionFactory::CaseInsensitive); From 5524f5901f4c22e7e97e1120410e7f47577e07e1 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 5 Mar 2024 16:18:03 +0100 Subject: [PATCH 12/20] Cleanup code --- src/Analyzer/ConstantNode.cpp | 3 +- src/Analyzer/ConstantNode.h | 2 + src/Planner/PlannerActionsVisitor.cpp | 69 +++++++++++++++++++++++++-- src/Planner/PlannerContext.cpp | 8 +--- src/Planner/PlannerContext.h | 13 +++-- 5 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index e3b87edbdc6..b6940271b1e 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -1,9 +1,10 @@ #include +#include + #include #include #include -#include "Analyzer/FunctionNode.h" #include #include diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 18090c56630..45f85cec4a3 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,8 +75,10 @@ public: return constant_value->getType(); } + /// Check if convertation to AST requires wrapping with _CAST function. bool requiresCastCall() const; + /// Check if constant is a result of _CAST function constant folding. bool receivedFromInitiatorServer() const; void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index e5610dd6fe7..52971539dd7 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -44,7 +44,12 @@ namespace ErrorCodes namespace { -String calculateActionNodeNameForConstant(const ConstantNode & constant_node) +/* Calculates Action node name for ConstantNode. + * + * If converting to AST will add a '_CAST' function call, + * the result action name will also include it. + */ +String calculateActionNodeNameWithCastIfNeeded(const ConstantNode & constant_node) { WriteBufferFromOwnString buffer; if (constant_node.requiresCastCall()) @@ -104,16 +109,43 @@ public: case QueryTreeNodeType::CONSTANT: { const auto & constant_node = node->as(); + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ if (planner_context.isASTLevelOptimizationAllowed()) { - result = calculateActionNodeNameForConstant(constant_node); + result = calculateActionNodeNameWithCastIfNeeded(constant_node); } else { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) - result = calculateActionNodeNameForConstant(constant_node); + result = calculateActionNodeNameWithCastIfNeeded(constant_node); else result = calculateActionNodeName(constant_node.getSourceExpression()); } @@ -560,16 +592,43 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto constant_node_name = [&]() { + /* To ensure that headers match during distributed query we need to simulate action node naming on + * secondary servers. If we don't do that headers will mismatch due to constant folding. + * + * +--------+ + * -----------------| Server |---------------- + * / +--------+ \ + * / \ + * v v + * +-----------+ +-----------+ + * | Initiator | ------ | Secondary |------ + * +-----------+ / +-----------+ \ + * | / \ + * | / \ + * v / \ + * +---------------+ v v + * | Wrap in _CAST | +----------------------------+ +----------------------+ + * | if needed | | Constant folded from _CAST | | Constant folded from | + * +---------------+ +----------------------------+ | another expression | + * | +----------------------+ + * v | + * +----------------------------+ v + * | Name ConstantNode the same | +--------------------------+ + * | as on initiator server | | Generate action name for | + * | (wrap in _CAST if needed) | | original expression | + * +----------------------------+ +--------------------------+ + */ if (planner_context->isASTLevelOptimizationAllowed()) { - return calculateActionNodeNameForConstant(constant_node); + return calculateActionNodeNameWithCastIfNeeded(constant_node); } else { + // Need to check if constant folded from QueryNode until https://github.com/ClickHouse/ClickHouse/issues/60847 is fixed. if (constant_node.hasSourceExpression() && constant_node.getSourceExpression()->getNodeType() != QueryTreeNodeType::QUERY) { if (constant_node.receivedFromInitiatorServer()) - return calculateActionNodeNameForConstant(constant_node); + return calculateActionNodeNameWithCastIfNeeded(constant_node); else return action_node_name_helper.calculateActionNodeName(constant_node.getSourceExpression()); } diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index 57db84d5031..f33255f0a44 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -4,7 +4,6 @@ #include #include #include -#include "Interpreters/SelectQueryOptions.h" namespace DB { @@ -46,7 +45,7 @@ bool GlobalPlannerContext::hasColumnIdentifier(const ColumnIdentifier & column_i PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_) : query_context(std::move(query_context_)) , global_planner_context(std::move(global_planner_context_)) - , select_query_options(select_query_options_) + , is_ast_level_optimization_allowed(!(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options_.ignore_ast_optimizations)) {} TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) @@ -119,11 +118,6 @@ const ColumnIdentifier * PlannerContext::getColumnNodeIdentifierOrNull(const Que return table_expression_data->getColumnIdentifierOrNull(column_name); } -bool PlannerContext::isASTLevelOptimizationAllowed() const -{ - return !(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options.ignore_ast_optimizations); -} - PlannerContext::SetKey PlannerContext::createSetKey(const DataTypePtr & left_operand_type, const QueryTreeNodePtr & set_source_node) { const auto set_source_hash = set_source_node->getTreeHash(); diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index e7fa7abf2b6..4d9ba037cac 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -111,11 +111,6 @@ public: return global_planner_context; } - const SelectQueryOptions & getSelectQueryOptions() const - { - return select_query_options; - } - /// Get or create table expression data for table expression node. TableExpressionData & getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node); @@ -172,7 +167,11 @@ public: PreparedSets & getPreparedSets() { return prepared_sets; } - bool isASTLevelOptimizationAllowed() const; + /// Returns false if any of following conditions met: + /// 1. Query is executed on a follower node. + /// 2. ignore_ast_optimizations is set. + bool isASTLevelOptimizationAllowed() const { return is_ast_level_optimization_allowed; } + private: /// Query context ContextMutablePtr query_context; @@ -180,7 +179,7 @@ private: /// Global planner context GlobalPlannerContextPtr global_planner_context; - SelectQueryOptions select_query_options; + bool is_ast_level_optimization_allowed; /// Column node to column identifier std::unordered_map column_node_to_column_identifier; From 55c028322dfb4111baf8d0ad8eac4d48975dd349 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 5 Mar 2024 16:59:22 +0100 Subject: [PATCH 13/20] Fix typo --- src/Analyzer/ConstantNode.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 45f85cec4a3..24f0c786980 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -75,7 +75,7 @@ public: return constant_value->getType(); } - /// Check if convertation to AST requires wrapping with _CAST function. + /// Check if conversion to AST requires wrapping with _CAST function. bool requiresCastCall() const; /// Check if constant is a result of _CAST function constant folding. From d0e887f8344a8ba70fff50a5d4f23e1a724c3c1b Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 6 Mar 2024 15:02:18 +0000 Subject: [PATCH 14/20] Update index.md --- .../sql-reference/window-functions/index.md | 108 ++++++++++-------- 1 file changed, 59 insertions(+), 49 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 6340c369bff..5bfe22d23a2 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -5,7 +5,12 @@ sidebar_label: Window Functions title: Window Functions --- -ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported: +Windows functions let you perform calculations across a set of rows that are related to the current row. +Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. + +## Standard Window Functionos + +ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. | Feature | Support or workaround | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -25,6 +30,8 @@ ClickHouse supports the standard grammar for defining windows and window functio ## ClickHouse-specific Window Functions +There are also the following window function that's specific to ClickHouse: + ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) Finds non-negative derivative for given `metric_column` by `timestamp_column`. @@ -33,40 +40,6 @@ The computed value is the following for each row: - `0` for 1st row, - ${metric_i - metric_{i-1} \over timestamp_i - timestamp_{i-1}} * interval$ for $i_th$ row. -## References - -### GitHub Issues - -The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). - -All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. - -### Tests - -These tests contain the examples of the currently supported grammar: - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml - -https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql - -### Postgres Docs - -https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW - -https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS - -https://www.postgresql.org/docs/devel/functions-window.html - -https://www.postgresql.org/docs/devel/tutorial-window.html - -### MySQL Docs - -https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html - -https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html - ## Syntax ```text @@ -80,20 +53,7 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column] - `PARTITION BY` - defines how to break a resultset into groups. - `ORDER BY` - defines how to order rows inside the group during calculation aggregate_function. - `ROWS or RANGE` - defines bounds of a frame, aggregate_function is calculated within a frame. -- `WINDOW` - allows to reuse a window definition with multiple expressions. - -### Functions - -These functions can be used only as a window function. - -- `row_number()` - Number the current row within its partition starting from 1. -- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. -- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. -- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. -- `rank()` - Rank the current row within its partition with gaps. -- `dense_rank()` - Rank the current row within its partition without gaps. -- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. -- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. +- `WINDOW` - allows multiple expressions to use the same window definition. ```text PARTITION @@ -112,8 +72,23 @@ These functions can be used only as a window function. └─────────────────┘ <--- UNBOUNDED FOLLOWING (END of the PARTITION) ``` +### Functions + +These functions can be used only as a window function. + +- `row_number()` - Number the current row within its partition starting from 1. +- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame. +- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame. +- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame. +- `rank()` - Rank the current row within its partition with gaps. +- `dense_rank()` - Rank the current row within its partition without gaps. +- `lagInFrame(x)` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. +- `leadInFrame(x)` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. + ## Examples +Let's have a look at some examples of how window functions can be used. + ```sql CREATE TABLE wf_partition ( @@ -589,6 +564,41 @@ ORDER BY └──────────────┴─────────────────────┴───────┴─────────────────────────┘ ``` +## References + +### GitHub Issues + +The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). + +All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. + +### Tests + +These tests contain the examples of the currently supported grammar: + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql + +### Postgres Docs + +https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW + +https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + +https://www.postgresql.org/docs/devel/functions-window.html + +https://www.postgresql.org/docs/devel/tutorial-window.html + +### MySQL Docs + +https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html + + ## Related Content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) From 43d5338f9233f19f774293f59cafb0ff2093030e Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Mar 2024 17:10:26 +0100 Subject: [PATCH 15/20] restart ci From 0dea920a06d32acdda751cab20ef475163556769 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:26:16 +0100 Subject: [PATCH 16/20] reload checks From b1b999f1c89d213164ba20b01c1a37c16e93d068 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:44:57 +0100 Subject: [PATCH 17/20] fix typo --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 5bfe22d23a2..9b2ded7b6ce 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -8,7 +8,7 @@ title: Window Functions Windows functions let you perform calculations across a set of rows that are related to the current row. Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned. -## Standard Window Functionos +## Standard Window Functions ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. From 5b28614f4ccda9a4620d4121479b3328d5aea04f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 7 Mar 2024 14:50:37 +0100 Subject: [PATCH 18/20] Fix TableFunctionExecutable::skipAnalysisForArguments --- src/TableFunctions/TableFunctionExecutable.cpp | 3 +++ .../03006_analyzer_executable_table_function.reference | 0 .../0_stateless/03006_analyzer_executable_table_function.sql | 4 ++++ 3 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/03006_analyzer_executable_table_function.reference create mode 100644 tests/queries/0_stateless/03006_analyzer_executable_table_function.sql diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 9fa5ddf15c2..2c3802e8667 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -71,6 +71,9 @@ std::vector TableFunctionExecutable::skipAnalysisForArguments(const Quer const auto & table_function_node_arguments = table_function_node.getArguments().getNodes(); size_t table_function_node_arguments_size = table_function_node_arguments.size(); + if (table_function_node_arguments_size <= 2) + return {}; + std::vector result_indexes; result_indexes.reserve(table_function_node_arguments_size - 2); for (size_t i = 2; i < table_function_node_arguments_size; ++i) diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference b/tests/queries/0_stateless/03006_analyzer_executable_table_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql new file mode 100644 index 00000000000..4edbcc97f50 --- /dev/null +++ b/tests/queries/0_stateless/03006_analyzer_executable_table_function.sql @@ -0,0 +1,4 @@ +SELECT + toFixedString(toFixedString(toLowCardinality(toFixedString('--------------------', toNullable(20))), toLowCardinality(20)), 20), + * +FROM executable('data String', SETTINGS max_command_execution_time = 100); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} From b9d6f4b3ed999e217d0be22f1580b4d19de4e3d9 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 7 Mar 2024 17:42:50 +0000 Subject: [PATCH 19/20] fix deadlock in async inserts via native protocol --- src/Server/TCPHandler.cpp | 4 +++- .../03006_async_insert_deadlock_log.reference | 1 + .../0_stateless/03006_async_insert_deadlock_log.sh | 12 ++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03006_async_insert_deadlock_log.reference create mode 100755 tests/queries/0_stateless/03006_async_insert_deadlock_log.sh diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d883029408c..a3cdec6a4f3 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -936,6 +936,8 @@ void TCPHandler::processInsertQuery() auto result = processAsyncInsertQuery(*insert_queue); if (result.status == AsynchronousInsertQueue::PushResult::OK) { + /// Reset pipeline because it may hold write lock for some storages. + state.io.pipeline.reset(); if (settings.wait_for_async_insert) { size_t timeout_ms = settings.wait_for_async_insert_timeout.totalMilliseconds(); @@ -968,7 +970,7 @@ void TCPHandler::processInsertQuery() else { PushingPipelineExecutor executor(state.io.pipeline); - run_executor(executor, processed_block); + run_executor(executor, std::move(processed_block)); } sendInsertProfileEvents(); diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh new file mode 100755 index 00000000000..f01c34cdbda --- /dev/null +++ b/tests/queries/0_stateless/03006_async_insert_deadlock_log.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --query "CREATE TABLE t_async_insert_deadlock (a UInt64) ENGINE = Log" + +echo '{"a": 1}' | $CLICKHOUSE_CLIENT --async_insert 1 --wait_for_async_insert 1 --query "INSERT INTO t_async_insert_deadlock FORMAT JSONEachRow" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM t_async_insert_deadlock ORDER BY a" +$CLICKHOUSE_CLIENT --query "DROP TABLE t_async_insert_deadlock" From efb72ca8f1a9d20b2053ce280a22e10039d57ed1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 8 Mar 2024 08:41:08 +0100 Subject: [PATCH 20/20] Add tests for fixed issues --- .../0_stateless/03006_buffer_overflow_join.reference | 0 tests/queries/0_stateless/03006_buffer_overflow_join.sql | 6 ++++++ .../03007_column_nullable_uninitialzed_value.reference | 3 +++ .../03007_column_nullable_uninitialzed_value.sql | 1 + 4 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03006_buffer_overflow_join.reference create mode 100644 tests/queries/0_stateless/03006_buffer_overflow_join.sql create mode 100644 tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference create mode 100644 tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.reference b/tests/queries/0_stateless/03006_buffer_overflow_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_buffer_overflow_join.sql b/tests/queries/0_stateless/03006_buffer_overflow_join.sql new file mode 100644 index 00000000000..8c1fa3cecc0 --- /dev/null +++ b/tests/queries/0_stateless/03006_buffer_overflow_join.sql @@ -0,0 +1,6 @@ +CREATE TABLE 03006_buffer_overflow_l (`a` String, `b` Tuple(String, String)) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_l SELECT * FROM generateRandom() limit 1000; +CREATE TABLE 03006_buffer_overflow_r (`a` LowCardinality(Nullable(String)), `c` Tuple(LowCardinality(String), LowCardinality(String))) ENGINE = Memory; +INSERT INTO 03006_buffer_overflow_r SELECT * FROM generateRandom() limit 1000; + +SELECT a FROM 03006_buffer_overflow_l RIGHT JOIN 03006_buffer_overflow_r USING (a) ORDER BY a ASC NULLS FIRST FORMAT Null; diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference new file mode 100644 index 00000000000..bead7ee1474 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.reference @@ -0,0 +1,3 @@ +\N 1000 + +\N 1000 diff --git a/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql new file mode 100644 index 00000000000..9479044e0e0 --- /dev/null +++ b/tests/queries/0_stateless/03007_column_nullable_uninitialzed_value.sql @@ -0,0 +1 @@ +SELECT count(NULL) IGNORE NULLS > avg(toDecimal32(NULL)) IGNORE NULLS, count() FROM numbers(1000) WITH TOTALS SETTINGS allow_experimental_analyzer = 1;