Merge pull request #60548 from kitaisreal/merge-tree-read-split-ranges-into-intersecting-and-non-intersecting-fault-injection

MergeTree read split ranges into intersecting and non intersecting injection
This commit is contained in:
Nikolai Kochetov 2024-03-22 11:52:14 +01:00 committed by GitHub
commit 0f60596f5b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
45 changed files with 217 additions and 74 deletions

View File

@ -287,6 +287,7 @@ class IColumn;
M(UInt64, read_backoff_min_concurrency, 1, "Settings to try keeping the minimal number of threads in case of slow reads.", 0) \
\
M(Float, memory_tracker_fault_probability, 0., "For testing of `exception safety` - throw an exception every time you allocate memory with the specified probability.", 0) \
M(Float, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability.", 0) \
\
M(Bool, enable_http_compression, false, "Compress the result if the client over HTTP said that it understands data compressed by gzip, deflate, zstd, br, lz4, bz2, xz.", 0) \
M(Int64, http_zlib_compression_level, 3, "Compression level - used if the client on HTTP said that it understands data compressed by gzip or deflate.", 0) \

View File

@ -103,6 +103,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"},
{"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"},
{"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
{"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability", 0.0, 0.0, "For testing of `PartsSplitter` - split read ranges into intersecting and non intersecting every time you read from MergeTree with the specified probability."},
}},
{"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
{"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},

View File

@ -34,6 +34,12 @@ std::string toString(const Values & value)
return fmt::format("({})", fmt::join(value, ", "));
}
/** We rely that FieldVisitorAccurateLess will have strict weak ordering for any Field values including
* NaN, Null and containers (Array, Tuple, Map) that contain NaN or Null. But right now it does not properly
* support NaN and Nulls inside containers, because it uses Field operator< or accurate::lessOp for comparison
* that compares Nulls and NaNs differently than FieldVisitorAccurateLess.
* TODO: Update Field operator< to compare NaNs and Nulls the same way as FieldVisitorAccurateLess.
*/
bool isSafePrimaryDataKeyType(const IDataType & data_type)
{
auto type_id = data_type.getTypeId();
@ -316,12 +322,12 @@ struct PartRangeIndex
bool operator==(const PartRangeIndex & other) const
{
return part_index == other.part_index && range.begin == other.range.begin && range.end == other.range.end;
return std::tie(part_index, range.begin, range.end) == std::tie(other.part_index, other.range.begin, other.range.end);
}
bool operator<(const PartRangeIndex & other) const
{
return part_index < other.part_index && range.begin < other.range.begin && range.end < other.range.end;
return std::tie(part_index, range.begin, range.end) < std::tie(other.part_index, other.range.begin, other.range.end);
}
size_t part_index;
@ -786,7 +792,7 @@ ASTs buildFilters(const KeyDescription & primary_key, const std::vector<Values>
const auto & type = primary_key.data_types.at(i);
// PK may contain functions of the table columns, so we need the actual PK AST with all expressions it contains.
auto pk_ast = primary_key.expression_list_ast->children.at(i);
auto pk_ast = primary_key.expression_list_ast->children.at(i)->clone();
// If PK is nullable, prepend a null mask column for > comparison.
// Also transform the AST into assumeNotNull(pk) so that the result type is not-nullable.

View File

@ -766,6 +766,82 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreams(RangesInDataParts && parts_
auto read_type = is_parallel_reading_from_replicas ? ReadType::ParallelReplicas : ReadType::Default;
double read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = settings.merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability;
std::bernoulli_distribution fault(read_split_ranges_into_intersecting_and_non_intersecting_injection_probability);
if (read_type != ReadType::ParallelReplicas &&
num_streams > 1 &&
read_split_ranges_into_intersecting_and_non_intersecting_injection_probability > 0.0 &&
fault(thread_local_rng) &&
!isQueryWithFinal() &&
data.merging_params.is_deleted_column.empty() &&
!prewhere_info)
{
NameSet column_names_set(column_names.begin(), column_names.end());
Names in_order_column_names_to_read(column_names);
/// Add columns needed to calculate the sorting expression
for (const auto & column_name : metadata_for_reading->getColumnsRequiredForSortingKey())
{
if (column_names_set.contains(column_name))
continue;
in_order_column_names_to_read.push_back(column_name);
column_names_set.insert(column_name);
}
auto in_order_reading_step_getter = [this, &in_order_column_names_to_read, &info](auto parts)
{
return this->read(
std::move(parts),
in_order_column_names_to_read,
ReadType::InOrder,
1 /* num_streams */,
0 /* min_marks_for_concurrent_read */,
info.use_uncompressed_cache);
};
auto sorting_expr = std::make_shared<ExpressionActions>(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone());
SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey(
metadata_for_reading->getPrimaryKey(),
std::move(sorting_expr),
std::move(parts_with_ranges),
num_streams,
context,
std::move(in_order_reading_step_getter),
true /*split_parts_ranges_into_intersecting_and_non_intersecting_final*/,
true /*split_intersecting_parts_ranges_into_layers*/);
auto merging_pipes = std::move(split_ranges_result.merging_pipes);
auto non_intersecting_parts_ranges_read_pipe = read(std::move(split_ranges_result.non_intersecting_parts_ranges),
column_names,
read_type,
num_streams,
info.min_marks_for_concurrent_read,
info.use_uncompressed_cache);
if (merging_pipes.empty())
return non_intersecting_parts_ranges_read_pipe;
Pipes pipes;
pipes.resize(2);
pipes[0] = Pipe::unitePipes(std::move(merging_pipes));
pipes[1] = std::move(non_intersecting_parts_ranges_read_pipe);
auto conversion_action = ActionsDAG::makeConvertingActions(
pipes[0].getHeader().getColumnsWithTypeAndName(),
pipes[1].getHeader().getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Name);
pipes[0].addSimpleTransform(
[conversion_action](const Block & header)
{
auto converting_expr = std::make_shared<ExpressionActions>(conversion_action);
return std::make_shared<ExpressionTransform>(header, converting_expr);
});
return Pipe::unitePipes(std::move(pipes));
}
return read(std::move(parts_with_ranges),
column_names,
read_type,

View File

@ -52,6 +52,11 @@ def get_options(i: int, upgrade_check: bool) -> str:
if i % 5 == 1:
client_options.append("memory_tracker_fault_probability=0.001")
if i % 5 == 1:
client_options.append(
"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.05"
)
if i % 2 == 1 and not upgrade_check:
client_options.append("group_by_use_nulls=1")

View File

@ -843,6 +843,9 @@ class SettingsRandomizer:
"prefer_warmed_unmerged_parts_seconds": lambda: random.randint(0, 10),
"use_page_cache_for_disks_without_file_cache": lambda: random.random() < 0.7,
"page_cache_inject_eviction": lambda: random.random() < 0.5,
"merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": lambda: round(
random.random(), 2
),
}
@staticmethod

View File

@ -7,11 +7,11 @@ INSERT INTO sorted (x) SELECT intDiv(number, 100000) AS x FROM system.numbers LI
SET max_threads = 1;
SELECT count() FROM sorted;
SELECT DISTINCT x FROM sorted;
SELECT x FROM (SELECT DISTINCT x FROM sorted) ORDER BY x;
INSERT INTO sorted (x) SELECT (intHash64(number) % 1000 = 0 ? 999 : intDiv(number, 100000)) AS x FROM system.numbers LIMIT 1000000;
SELECT count() FROM sorted;
SELECT DISTINCT x FROM sorted;
SELECT x FROM (SELECT DISTINCT x FROM sorted) ORDER BY x;
DROP TABLE sorted;

View File

@ -1,8 +1,10 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS index_for_like;
set allow_deprecated_syntax_for_merge_tree=1;
CREATE TABLE index_for_like (s String, d Date DEFAULT today()) ENGINE = MergeTree(d, (s, d), 1);
INSERT INTO index_for_like (s) VALUES ('Hello'), ('Hello, World'), ('Hello, World 1'), ('Hello 1'), ('Goodbye'), ('Goodbye, World'), ('Goodbye 1'), ('Goodbye, World 1');
INSERT INTO index_for_like (s) VALUES ('Hello'), ('Hello, World'), ('Hello, World 1'), ('Hello 1'), ('Goodbye'), ('Goodbye, World'), ('Goodbye 1'), ('Goodbye, World 1');
SET max_rows_to_read = 3;
SELECT s FROM index_for_like WHERE s LIKE 'Hello, World%';

View File

@ -20,7 +20,7 @@ SETTINGS index_granularity = 1;
INSERT INTO pk_in_tuple_perf SELECT number, number * 10 FROM numbers(100);
EOF
query="SELECT count() FROM pk_in_tuple_perf WHERE (v, u) IN ((2, 10), (2, 20))"
query="SELECT count() FROM pk_in_tuple_perf WHERE (v, u) IN ((2, 10), (2, 20)) SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0"
$CLICKHOUSE_CLIENT --query "$query"
$CLICKHOUSE_CLIENT --query "$query FORMAT JSON" | grep "rows_read"
@ -40,7 +40,7 @@ SETTINGS index_granularity = 1;
INSERT INTO pk_in_tuple_perf_non_const SELECT today() - number, number FROM numbers(100);
EOF
query="SELECT count() FROM pk_in_tuple_perf_non_const WHERE (u, d) IN ((0, today()), (1, today()))"
query="SELECT count() FROM pk_in_tuple_perf_non_const WHERE (u, d) IN ((0, today()), (1, today())) SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0"
$CLICKHOUSE_CLIENT --query "$query"
$CLICKHOUSE_CLIENT --query "$query FORMAT JSON" | grep "rows_read"

View File

@ -31,31 +31,31 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO minmax_idx VALUES
(8, 1, 2),
(9, 1, 2)"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="
ALTER TABLE minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1 SETTINGS mutations_sync = 2;"
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx MATERIALIZE INDEX idx IN PARTITION 1 SETTINGS mutations_sync = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx MATERIALIZE INDEX idx IN PARTITION 2 SETTINGS mutations_sync = 2"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx CLEAR INDEX idx IN PARTITION 1 SETTINGS mutations_sync = 2"
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx CLEAR INDEX idx IN PARTITION 2 SETTINGS mutations_sync = 2"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx MATERIALIZE INDEX idx SETTINGS mutations_sync = 2"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="DROP TABLE minmax_idx"

View File

@ -32,17 +32,17 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO minmax_idx VALUES
(8, 1, 2),
(9, 1, 2)"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" # Returns 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read" # Returns 4
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx CLEAR INDEX idx IN PARTITION 1;" --mutations_sync=1
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" # Returns 6
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read" # Returns 6
$CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;" --mutations_sync=1
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read" # Returns 4
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read" # Returns 4
$CLICKHOUSE_CLIENT --query="DROP TABLE minmax_idx"

View File

@ -1,4 +1,5 @@
SET allow_suspicious_low_cardinality_types=1;
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS single_column_bloom_filter;

View File

@ -32,9 +32,9 @@ $CLICKHOUSE_CLIENT --query="insert into lowString (a, b) select top 100000 toStr
$CLICKHOUSE_CLIENT --query="insert into string (a, b) select top 100000 toString(number), today() from system.numbers"
$CLICKHOUSE_CLIENT --query="select count() from lowString where a in ('1', '2') FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="select count() from lowString where a in ('1', '2') SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="select count() from string where a in ('1', '2') FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="select count() from string where a in ('1', '2') SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="DROP TABLE lowString;"
$CLICKHOUSE_CLIENT --query="DROP TABLE string;"

View File

@ -49,18 +49,18 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO indices_mutaions1 VALUES
$CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA indices_mutaions2"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON;" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="ALTER TABLE indices_mutaions1 CLEAR INDEX idx IN PARTITION 1;" --replication_alter_partitions_sync=2 --mutations_sync=2
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON;" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="ALTER TABLE indices_mutaions1 MATERIALIZE INDEX idx IN PARTITION 1;" --replication_alter_partitions_sync=2 --mutations_sync=2
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 FORMAT JSON;" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM indices_mutaions2 WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON;" | grep "rows_read"
$CLICKHOUSE_CLIENT --query="DROP TABLE indices_mutaions1"
$CLICKHOUSE_CLIENT --query="DROP TABLE indices_mutaions2"

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS test1;
DROP TABLE IF EXISTS test2;
DROP TABLE IF EXISTS test3;

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS minmax_compact;
CREATE TABLE minmax_compact

View File

@ -1,4 +1,4 @@
-- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-merge-tree-settings
-- Tags: no-debug, no-parallel, long, no-s3-storage, no-random-settings, no-random-merge-tree-settings
DROP TABLE IF EXISTS table_with_single_pk;

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS test1;
DROP TABLE IF EXISTS test2;

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS test;
CREATE TABLE test (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS index_granularity = 1000, index_granularity_bytes = '10Mi';
INSERT INTO test SELECT * FROM numbers(1000000);

View File

@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Tags: long, no-polymorphic-parts, no-random-merge-tree-settings, no-debug
# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug
# Description of test result:
# Test the correctness of the partition pruning

View File

@ -1,5 +1,7 @@
-- Tags: global
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists xp;
drop table if exists xp_d;

View File

@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
SETTINGS="SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1; SET optimize_substitute_columns = 1; SET optimize_append_index = 1"
SETTINGS="SET convert_query_to_cnf = 1; SET optimize_using_constraints = 1; SET optimize_move_to_prewhere = 1; SET optimize_substitute_columns = 1; SET optimize_append_index = 1; SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;"
$CLICKHOUSE_CLIENT -n --query="
$SETTINGS;

View File

@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. "$CURDIR"/../shell_config.sh
# Number of read rows depends on max_bytes_before_external_group_by.
CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --max_bytes_before_external_group_by 0"
CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --max_bytes_before_external_group_by 0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test_agg_proj (x Int32, y Int32, PROJECTION x_plus_y (SELECT sum(x - y), argMax(x, y) group by x + y)) ENGINE = MergeTree ORDER BY tuple() settings index_granularity = 1"
$CLICKHOUSE_CLIENT -q "insert into test_agg_proj select intDiv(number, 2), -intDiv(number,3) - 1 from numbers(100)"

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists t;
create table t (i int, j int, k int, projection p (select * order by j)) engine MergeTree order by i settings index_granularity = 1;

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists x;
create table x (i int, j int) engine MergeTree partition by i order by j settings index_granularity = 1;

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS t_max_rows_to_read;
CREATE TABLE t_max_rows_to_read (a UInt64)

View File

@ -1,5 +1,7 @@
-- Tags: no-s3-storage
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists data_02233;
create table data_02233 (parent_key Int, child_key Int, value Int) engine=MergeTree() order by parent_key;

View File

@ -1,6 +1,8 @@
-- Tags: no-parallel, no-fasttest
-- Tag no-fasttest: Depends on S3
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
-- { echo }
drop table if exists test_02302;
create table test_02302 (a UInt64) engine = S3(s3_conn, filename='test_02302_{_partition_id}', format=Parquet) partition by a;

View File

@ -15,11 +15,13 @@ function check_refcnt_for_table()
local query_id
query_id="$table-$(random_str 10)"
SETTINGS="--format Null --max_threads 1 --max_block_size 1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability 0.0"
# Notes:
# - query may sleep 1*(200/4)=50 seconds maximum, it is enough to check system.parts
# - "part = 1" condition should prune all parts except first
# - max_block_size=1 with index_granularity=1 will allow to cancel the query earlier
$CLICKHOUSE_CLIENT --format Null --max_threads 1 --max_block_size 1 --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" &
$CLICKHOUSE_CLIENT $SETTINGS --query_id "$query_id" -q "select sleepEachRow(1) from $table where part = 1" &
PID=$!
# wait for query to be started

View File

@ -2,6 +2,7 @@
-- produces different pipeline if enabled
set enable_memory_bound_merging_of_aggregation_results = 0;
set merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
set max_threads = 16;
set prefer_localhost_replica = 1;

View File

@ -1,5 +1,6 @@
SET allow_experimental_inverted_index = 1;
SET log_queries = 1;
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
----------------------------------------------------
SELECT 'Test inverted(2)';
@ -23,7 +24,7 @@ SELECT * FROM tab WHERE s == 'Alick a01';
-- check the query only read 1 granules (2 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==2 from system.query_log
SELECT read_rows==2 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s == \'Alick a01\';')
@ -36,7 +37,7 @@ SELECT * FROM tab WHERE s LIKE '%01%' ORDER BY k;
-- check the query only read 2 granules (4 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==4 from system.query_log
SELECT read_rows==4 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s LIKE \'%01%\' ORDER BY k;')
@ -49,11 +50,11 @@ SELECT * FROM tab WHERE hasToken(s, 'Click') ORDER BY k;
-- check the query only read 4 granules (8 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==8 from system.query_log
SELECT read_rows==8 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE hasToken(s, \'Click\') ORDER BY k;')
AND type='QueryFinish'
AND type='QueryFinish'
AND result_rows==4
LIMIT 1;
@ -76,11 +77,11 @@ SELECT * FROM tab_x WHERE hasToken(s, 'Alick') ORDER BY k;
-- check the query only read 4 granules (8 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==8 from system.query_log
SELECT read_rows==8 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE hasToken(s, \'Alick\');')
AND type='QueryFinish'
AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE hasToken(s, \'Alick\');')
AND type='QueryFinish'
AND result_rows==4
LIMIT 1;
@ -89,24 +90,24 @@ SELECT * FROM tab_x WHERE s IN ('Alick a01', 'Alick a06') ORDER BY k;
-- check the query only read 2 granules (4 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==4 from system.query_log
SELECT read_rows==4 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE s IN (\'Alick a01\', \'Alick a06\') ORDER BY k;')
AND type='QueryFinish'
AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE s IN (\'Alick a01\', \'Alick a06\') ORDER BY k;')
AND type='QueryFinish'
AND result_rows==2
LIMIT 1;
-- search inverted index with multiSearch
-- search inverted index with multiSearch
SELECT * FROM tab_x WHERE multiSearchAny(s, ['a01', 'b01']) ORDER BY k;
-- check the query only read 2 granules (4 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==4 from system.query_log
SELECT read_rows==4 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE multiSearchAny(s, [\'a01\', \'b01\']) ORDER BY k;')
AND type='QueryFinish'
AND endsWith(trimRight(query), 'SELECT * FROM tab_x WHERE multiSearchAny(s, [\'a01\', \'b01\']) ORDER BY k;')
AND type='QueryFinish'
AND result_rows==2
LIMIT 1;
@ -129,11 +130,11 @@ SELECT * FROM tab WHERE has(s, 'Click a03') ORDER BY k;
-- check the query must read all 10 granules (20 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==2 from system.query_log
SELECT read_rows==2 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE has(s, \'Click a03\') ORDER BY k;')
AND type='QueryFinish'
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE has(s, \'Click a03\') ORDER BY k;')
AND type='QueryFinish'
AND result_rows==1
LIMIT 1;
@ -156,11 +157,11 @@ SELECT * FROM tab WHERE mapContains(s, 'Click') ORDER BY k;
-- check the query must read all 4 granules (8 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==8 from system.query_log
SELECT read_rows==8 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE mapContains(s, \'Click\') ORDER BY k;')
AND type='QueryFinish'
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE mapContains(s, \'Click\') ORDER BY k;')
AND type='QueryFinish'
AND result_rows==4
LIMIT 1;
@ -169,11 +170,11 @@ SELECT * FROM tab WHERE s['Click'] = 'Click a03';
-- check the query must read all 4 granules (8 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==8 from system.query_log
SELECT read_rows==8 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s[\'Click\'] = \'Click a03\';')
AND type='QueryFinish'
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s[\'Click\'] = \'Click a03\';')
AND type='QueryFinish'
AND result_rows==1
LIMIT 1;
@ -199,10 +200,10 @@ SELECT * FROM tab WHERE s LIKE '%01%' ORDER BY k;
-- check the query only read 3 granules (6 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==6 from system.query_log
WHERE query_kind ='Select'
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s LIKE \'%01%\' ORDER BY k;')
AND type='QueryFinish'
AND type='QueryFinish'
AND result_rows==3
LIMIT 1;
@ -226,11 +227,11 @@ SELECT * FROM tab WHERE s LIKE '%你好%' ORDER BY k;
-- check the query only read 1 granule (2 rows total; each granule has 2 rows)
SYSTEM FLUSH LOGS;
SELECT read_rows==2 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s LIKE \'%%\' ORDER BY k;')
AND type='QueryFinish'
SELECT read_rows==2 from system.query_log
WHERE query_kind ='Select'
AND current_database = currentDatabase()
AND endsWith(trimRight(query), 'SELECT * FROM tab WHERE s LIKE \'%%\' ORDER BY k;')
AND type='QueryFinish'
AND result_rows==1
LIMIT 1;

View File

@ -15,14 +15,14 @@ FIND_SORTMODE="$GREP_SORTMODE | $TRIM_LEADING_SPACES"
function explain_sorting {
echo "-- QUERY: "$1
$CLICKHOUSE_CLIENT -nq "$1" | eval $FIND_SORTING
$CLICKHOUSE_CLIENT --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTING
}
function explain_sortmode {
echo "-- QUERY: "$1
$CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -nq "$1" | eval $FIND_SORTMODE
$CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE
echo "-- QUERY (analyzer): "$1
$CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -nq "$1" | eval $FIND_SORTMODE
$CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTMODE
}
$CLICKHOUSE_CLIENT -q "drop table if exists optimize_sorting sync"

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
-- #40014
CREATE TABLE m0 (id UInt64) ENGINE=MergeTree ORDER BY id SETTINGS index_granularity = 1, ratio_of_defaults_for_sparse_serialization = 1.0;
INSERT INTO m0 SELECT number FROM numbers(10);

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS constCondOptimization;
CREATE TABLE constCondOptimization

View File

@ -1,5 +1,7 @@
-- Tags: no-random-merge-tree-settings
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists t;
create table t (x UInt64) engine = MergeTree order by x;
insert into t select number from numbers_mt(10000000) settings max_insert_threads=8;

View File

@ -15,9 +15,11 @@ ${CLICKHOUSE_CLIENT} --query "CREATE TABLE tab (a UInt64) ENGINE=MergeTree() ORD
${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (1) (2) (3)"
${CLICKHOUSE_CLIENT} --query "INSERT INTO tab VALUES (3) (4) (5)"
SETTINGS="SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0"
# Verify that the first query does two aggregations and the second query zero aggregations. Since query cache is currently not integrated
# with EXPLAIN PLAN, we need need to check the logs.
${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0" 2>&1 | grep "Aggregated. " | wc -l
${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab SETTINGS use_query_cache=1, max_threads=1, allow_experimental_analyzer=0" 2>&1 | grep "Aggregated. " | wc -l
# with EXPLAIN PLAN, we need to check the logs.
${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l
${CLICKHOUSE_CLIENT} --send_logs_level=trace --query "SELECT count(a) / (SELECT sum(a) FROM tab) FROM tab $SETTINGS" 2>&1 | grep "Aggregated. " | wc -l
${CLICKHOUSE_CLIENT} --query "SYSTEM DROP QUERY CACHE"

View File

@ -1,5 +1,7 @@
-- Tags: long, no-s3-storage
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
set max_threads = 16;
set allow_aggregate_partitions_independently = 1;
set force_aggregate_partitions_independently = 1;

View File

@ -17,6 +17,6 @@ $CLICKHOUSE_CLIENT -nm -q "
# instead of "last" value, hence you cannot simply append another
# --send_logs_level here.
CLICKHOUSE_CLIENT_CLEAN=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=test/g')
$CLICKHOUSE_CLIENT_CLEAN -q "select * from data" |& grep -o -e '<Unknown>.*' -e '<Test>.*'
$CLICKHOUSE_CLIENT_CLEAN -q "select * from data SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" |& grep -o -e '<Unknown>.*' -e '<Test>.*'
$CLICKHOUSE_CLIENT -q "drop table data"

View File

@ -1,5 +1,7 @@
-- Tags: no-s3-storage
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists t;
create table t(a UInt64) engine=MergeTree order by tuple();

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
CREATE TABLE test (id UInt64, `amax` AggregateFunction(argMax, String, DateTime))
ENGINE=MergeTree()
ORDER BY id

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS t_sparse_distinct;
CREATE TABLE t_sparse_distinct (id UInt32, v String)

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS test;
CREATE TABLE test (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY mortonEncode(x, y) SETTINGS index_granularity = 8192, index_granularity_bytes = '1Mi';

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS ttl_group_by_bug;
CREATE TABLE ttl_group_by_bug

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
drop table if exists a;
create table a (i int) engine MergeTree order by i settings index_granularity = 2;

View File

@ -1,3 +1,5 @@
SET merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;
DROP TABLE IF EXISTS test_empty;
CREATE TABLE test_empty (a Array(Int64)) engine=MergeTree ORDER BY a;
INSERT INTO test_empty VALUES ([]);