Merge pull request #25333 from MaxWk/exact-rows-before-limit

add setting exact_rows_before_limit
This commit is contained in:
Alexey Milovidov 2022-07-24 02:30:31 +03:00 committed by GitHub
commit c489ad7d71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 97 additions and 3 deletions

View File

@ -769,6 +769,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
\
M(Bool, exact_rows_before_limit, false, "When enabled, ClickHouse will provide exact value for rows_before_limit_at_least statistic, but with the cost that the data before limit will have to be read completely", 0) \
M(UInt64, cross_to_inner_join_rewrite, 1, "Use inner join instead of comma/cross join if there're joining expressions in the WHERE section. Values: 0 - no rewrite, 1 - apply if possible for comma/cross, 2 - force rewrite all comma joins, cross - if possible", 0) \
\
M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \

View File

@ -2712,11 +2712,14 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
limit_offset = 0;
}
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset);
const Settings & settings = context->getSettingsRef();
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings.exact_rows_before_limit);
if (do_not_skip_offset)
limit->setStepDescription("preliminary LIMIT (with OFFSET)");
else
limit->setStepDescription("preliminary LIMIT (without OFFSET)");
query_plan.addStep(std::move(limit));
}
}
@ -2778,7 +2781,8 @@ void InterpreterSelectQuery::executeLimit(QueryPlan & query_plan)
* if there is WITH TOTALS and there is no ORDER BY, then read the data to the end,
* otherwise TOTALS is counted according to incomplete data.
*/
bool always_read_till_end = false;
const Settings & settings = context->getSettingsRef();
bool always_read_till_end = settings.exact_rows_before_limit;
if (query.group_by_with_totals && !query.orderBy())
always_read_till_end = true;

View File

@ -344,7 +344,7 @@ void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
{
if (settings.limit > 0)
{
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), settings.limit, settings.offset);
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), settings.limit, settings.offset, settings.exact_rows_before_limit);
limit->setStepDescription("LIMIT OFFSET for SETTINGS");
query_plan.addStep(std::move(limit));
}

View File

@ -0,0 +1,72 @@
{
"meta":
[
{
"name": "0",
"type": "UInt8"
}
],
"data":
[
[0]
],
"rows": 1,
"rows_before_limit_at_least": 10000
}
{
"meta":
[
{
"name": "0",
"type": "UInt8"
}
],
"data":
[
[0]
],
"rows": 1,
"rows_before_limit_at_least": 20000
}
{
"meta":
[
{
"name": "0",
"type": "UInt8"
}
],
"data":
[
[0]
],
"rows": 1,
"rows_before_limit_at_least": 1
}
{
"meta":
[
{
"name": "0",
"type": "UInt8"
}
],
"data":
[
[0]
],
"rows": 1,
"rows_before_limit_at_least": 20000
}

View File

@ -0,0 +1,17 @@
-- Tags: no-parallel
drop table if exists test_rows_compact_part;
create table test_rows_compact_part(f1 int,f2 int) engine=MergeTree partition by f1 order by f2 settings min_bytes_for_wide_part=10485760;
insert into test_rows_compact_part select 0,arrayJoin(range(10000)) ;
insert into test_rows_compact_part select 1,arrayJoin(range(10000));
select 0 from test_rows_compact_part limit 1 FORMAT JSONCompact settings exact_rows_before_limit = 0,output_format_write_statistics = 0;
select 0 from test_rows_compact_part limit 1 FORMAT JSONCompact settings exact_rows_before_limit = 1, output_format_write_statistics = 0;
drop table if exists test_rows_compact_part;
drop table if exists test_rows_wide_part;
create table test_rows_wide_part(f1 int,f2 int) engine=MergeTree partition by f1 order by f2 settings min_bytes_for_wide_part=0;
insert into test_rows_wide_part select 0,arrayJoin(range(10000)) ;
insert into test_rows_wide_part select 1,arrayJoin(range(10000));
select 0 from test_rows_wide_part limit 1 FORMAT JSONCompact settings exact_rows_before_limit = 0,output_format_write_statistics = 0;
select 0 from test_rows_wide_part limit 1 FORMAT JSONCompact settings exact_rows_before_limit = 1, output_format_write_statistics = 0;
drop table if exists test_rows_compact_part;