Merge pull request #64695 from ClickHouse/fix-early-const-folding-analyzer

Fix early constant folding for isNull/isNotNul and analyzer.
This commit is contained in:
Nikolai Kochetov 2024-06-11 08:35:57 +00:00 committed by GitHub
commit 68e4be0302
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 105 additions and 15 deletions

View File

@ -3,6 +3,7 @@
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeVariant.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Core/Field.h>
@ -174,4 +175,9 @@ DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type)
}
bool canContainNull(const IDataType & type)
{
return type.isNullable() || type.isLowCardinalityNullable() || isDynamic(type) || isVariant(type);
}
}

View File

@ -62,4 +62,6 @@ DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type);
/// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type);
bool canContainNull(const IDataType & type);
}

View File

@ -29,6 +29,18 @@ public:
return name;
}
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (elem.type->onlyNull())
return result_type->createColumnConst(1, UInt8(0));
if (canContainNull(*elem.type))
return nullptr;
return result_type->createColumnConst(1, UInt8(1));
}
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }

View File

@ -31,6 +31,18 @@ public:
return name;
}
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (elem.type->onlyNull())
return result_type->createColumnConst(1, UInt8(1));
if (canContainNull(*elem.type))
return nullptr;
return result_type->createColumnConst(1, UInt8(0));
}
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }

View File

@ -2,6 +2,7 @@
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
@ -23,6 +24,15 @@ public:
return name;
}
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (elem.type->onlyNull() || canContainNull(*elem.type))
return result_type->createColumnConst(1, UInt8(1));
return result_type->createColumnConst(1, UInt8(0));
}
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }

View File

@ -1,6 +1,7 @@
#include <Planner/PlannerExpressionAnalysis.h>
#include <Columns/ColumnNullable.h>
#include <Columns/FilterDescription.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
@ -37,7 +38,7 @@ namespace
* Actions before filter are added into into actions chain.
* It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized.
*/
FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node,
std::optional<FilterAnalysisResult> analyzeFilter(const QueryTreeNodePtr & filter_expression_node,
const ColumnsWithTypeAndName & input_columns,
const PlannerContextPtr & planner_context,
ActionsChain & actions_chain)
@ -45,7 +46,11 @@ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_no
FilterAnalysisResult result;
result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context);
result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name;
const auto * output = result.filter_actions->getOutputs().at(0);
if (output->column && ConstantFilterDescription(*output->column).always_true)
return {};
result.filter_column_name = output->result_name;
actions_chain.addStep(std::make_unique<ActionsChainStep>(result.filter_actions));
return result;
@ -534,8 +539,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
if (query_node.hasWhere())
{
where_analysis_result_optional = analyzeFilter(query_node.getWhere(), current_output_columns, planner_context, actions_chain);
where_action_step_index_optional = actions_chain.getLastStepIndex();
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
if (where_analysis_result_optional)
{
where_action_step_index_optional = actions_chain.getLastStepIndex();
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
}
}
auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, current_output_columns, planner_context, actions_chain);
@ -548,8 +556,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
if (query_node.hasHaving())
{
having_analysis_result_optional = analyzeFilter(query_node.getHaving(), current_output_columns, planner_context, actions_chain);
having_action_step_index_optional = actions_chain.getLastStepIndex();
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
if (having_analysis_result_optional)
{
having_action_step_index_optional = actions_chain.getLastStepIndex();
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
}
}
auto window_analysis_result_optional = analyzeWindow(query_tree, current_output_columns, planner_context, actions_chain);
@ -562,8 +573,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
if (query_node.hasQualify())
{
qualify_analysis_result_optional = analyzeFilter(query_node.getQualify(), current_output_columns, planner_context, actions_chain);
qualify_action_step_index_optional = actions_chain.getLastStepIndex();
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
if (qualify_analysis_result_optional)
{
qualify_action_step_index_optional = actions_chain.getLastStepIndex();
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
}
}
auto projection_analysis_result = analyzeProjection(query_node, current_output_columns, planner_context, actions_chain);

View File

@ -368,17 +368,21 @@ TEST(TransformQueryForExternalDatabase, Null)
check(state, 1, {"field"},
"SELECT field FROM table WHERE field IS NULL",
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)");
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)",
R"(SELECT "field" FROM "test"."table" WHERE 1 = 0)");
check(state, 1, {"field"},
"SELECT field FROM table WHERE field IS NOT NULL",
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)");
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)",
R"(SELECT "field" FROM "test"."table")");
check(state, 1, {"field"},
"SELECT field FROM table WHERE isNull(field)",
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)");
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NULL)",
R"(SELECT "field" FROM "test"."table" WHERE 1 = 0)");
check(state, 1, {"field"},
"SELECT field FROM table WHERE isNotNull(field)",
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)");
R"(SELECT "field" FROM "test"."table" WHERE "field" IS NOT NULL)",
R"(SELECT "field" FROM "test"."table")");
}
TEST(TransformQueryForExternalDatabase, ToDate)

View File

@ -15,6 +15,6 @@ SELECT isNull(t0.c0) OR COUNT('\n?pVa')
FROM t0
GROUP BY t0.c0
HAVING isNull(isNull(t0.c0))
SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0;
SETTINGS aggregate_functions_null_for_empty = 1, enable_optimize_predicate_expression = 0 format Null;
drop table if exists t0;

View File

@ -206,7 +206,7 @@ select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative
select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null < -500);
596 -1099 -501
select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null is null);
1000 499500
0 0
select count(), min(negative_or_null), max(negative_or_null) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where (negative_or_null is null);
0 0 0
select count(), sum(number) from file('02892.orc', ORC, 'number UInt64, negative_or_null Int64') where indexHint(negative_or_null in (0, -1, -10, -100, -1000));

View File

@ -0,0 +1 @@
ReadFromPreparedSource (Optimized trivial count)

View File

@ -0,0 +1,30 @@
CREATE TABLE checks
(
`pull_request_number` UInt32,
`commit_sha` LowCardinality(String),
`check_name` LowCardinality(String),
`check_status` LowCardinality(String),
`check_duration_ms` UInt64,
`check_start_time` DateTime,
`test_name` LowCardinality(String),
`test_status` LowCardinality(String),
`test_duration_ms` UInt64,
`report_url` String,
`pull_request_url` String,
`commit_url` String,
`task_url` String,
`base_ref` String,
`base_repo` String,
`head_ref` String,
`head_repo` String,
`test_context_raw` String,
`instance_type` LowCardinality(String),
`instance_id` String,
`date` Date MATERIALIZED toDate(check_start_time)
)
ENGINE = MergeTree ORDER BY (date, pull_request_number, commit_sha, check_name, test_name, check_start_time);
insert into checks select * from generateRandom() limit 1;
select trimLeft(explain) from (explain SELECT count(1) FROM checks WHERE test_name IS NOT NULL) where explain like '%ReadFromPreparedSource%' SETTINGS allow_experimental_analyzer = 1, allow_experimental_parallel_reading_from_replicas = 0;