Merge pull request #52151 from amosbird/fix_52075

Fix incorrect projection analysis when aggregation expression contains monotonic functions
This commit is contained in:
Nikolai Kochetov 2023-07-21 18:30:27 +02:00 committed by GitHub
commit 01cb502af3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 61 additions and 8 deletions

View File

@ -577,7 +577,7 @@ class IColumn;
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \
M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \
M(Bool, optimize_use_implicit_projections, true, "Automatically choose implicit projections to perform SELECT query", 0) \
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \

View File

@ -80,7 +80,6 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},

View File

@ -976,7 +976,15 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (node.name == "indexHint")
{
if (data.only_consts)
{
/// We need to collect constants inside `indexHint` for index analysis.
if (node.arguments)
{
for (const auto & arg : node.arguments->children)
visit(arg, data);
}
return;
}
/// Here we create a separate DAG for indexHint condition.
/// It will be used only for index analysis.

View File

@ -8,7 +8,7 @@
namespace DB
{
MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag)
MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity)
{
using Parents = std::set<const ActionsDAG::Node *>;
std::unordered_map<const ActionsDAG::Node *, Parents> inner_parents;
@ -75,7 +75,12 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG
}
/// A node from found match may be nullptr.
/// It means that node is visited, but no match was found.
frame.mapped_children.push_back(it->second.node);
if (it->second.monotonicity)
/// Ignore a match with monotonicity.
frame.mapped_children.push_back(nullptr);
else
frame.mapped_children.push_back(it->second.node);
}
if (frame.mapped_children.size() < frame.node->children.size())
@ -182,7 +187,7 @@ MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG
}
}
if (!match.node && frame.node->function_base->hasInformationAboutMonotonicity())
if (!match.node && check_monotonicity && frame.node->function_base->hasInformationAboutMonotonicity())
{
size_t num_const_args = 0;
const ActionsDAG::Node * monotonic_child = nullptr;

View File

@ -39,5 +39,5 @@ struct MatchedTrees
using Matches = std::unordered_map<const ActionsDAG::Node *, Match>;
};
MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag);
MatchedTrees::Matches matchTrees(const ActionsDAG & inner_dag, const ActionsDAG & outer_dag, bool check_monotonicity = true);
}

View File

@ -287,7 +287,7 @@ ActionsDAGPtr analyzeAggregateProjection(
{
auto proj_index = buildDAGIndex(*info.before_aggregation);
MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag);
MatchedTrees::Matches matches = matchTrees(*info.before_aggregation, *query.dag, false /* check_monotonicity */);
// for (const auto & [node, match] : matches)
// {
@ -497,6 +497,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
// LOG_TRACE(&Poco::Logger::get("optimizeUseProjections"), "Projection sample block 2 {}", block.dumpStructure());
// minmax_count_projection cannot be used used when there is no data to process, because
// it will produce incorrect result during constant aggregation.
// See https://github.com/ClickHouse/ClickHouse/issues/36728
if (block)
{
MinMaxProjectionCandidate minmax;

View File

@ -7031,7 +7031,9 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
max_added_blocks.get(),
query_context);
// minmax_count_projection should not be used when there is no data to process.
// minmax_count_projection cannot be used used when there is no data to process, because
// it will produce incorrect result during constant aggregation.
// See https://github.com/ClickHouse/ClickHouse/issues/36728
if (!query_info.minmax_count_projection_block)
return;

View File

@ -298,6 +298,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context)
SelectQueryOptions{
type == ProjectionDescription::Type::Normal ? QueryProcessingStage::FetchColumns
: QueryProcessingStage::WithMergeableState}
.ignoreASTOptimizations()
.ignoreSettingConstraints())
.buildQueryPipeline();
builder.resize(1);

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS t0;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t0 (c0 Int16, projection h (SELECT min(c0), max(c0), count() GROUP BY -c0)) ENGINE = MergeTree ORDER BY ();
INSERT INTO t0(c0) VALUES (1);
SELECT count() FROM t0 GROUP BY gcd(-sign(c0), -c0) SETTINGS optimize_use_implicit_projections = 1;
create table t1 (c0 Int32) engine = MergeTree order by sin(c0);
insert into t1 values (-1), (1);
select c0 from t1 order by sin(-c0) settings optimize_read_in_order=0;
select c0 from t1 order by sin(-c0) settings optimize_read_in_order=1;
DROP TABLE t0;
DROP TABLE t1;

View File

@ -33,3 +33,8 @@ insert into XXXX select number*60, 0 from numbers(100000);
SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1;
100000
drop table XXXX;
CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1;
INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);
SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1;
0
drop table XXXX;

View File

@ -33,3 +33,11 @@ insert into XXXX select number*60, 0 from numbers(100000);
SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1;
drop table XXXX;
CREATE TABLE XXXX (p Nullable(Int64), k Decimal(76, 39)) ENGINE = MergeTree PARTITION BY toDate(p) ORDER BY k SETTINGS index_granularity = 1, allow_nullable_key = 1;
INSERT INTO XXXX FORMAT Values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);
SELECT count() FROM XXXX WHERE indexHint(p = 1.) SETTINGS optimize_use_implicit_projections = 1;
drop table XXXX;