From d1c5163a38b7ae2baf5b55cf7282b49025103333 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Mon, 4 Dec 2023 23:45:58 -0800 Subject: [PATCH 01/88] asynchronous metrics - total primary key bytes in memory --- .../ServerAsynchronousMetrics.cpp | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 84d31bae13f..d000ecd684e 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -249,6 +249,9 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values size_t total_number_of_rows_system = 0; size_t total_number_of_parts_system = 0; + size_t total_primary_key_bytes_memory = 0; + size_t total_primary_key_bytes_memory_allocated = 0; + for (const auto & db : databases) { /// Check if database can contain MergeTree tables @@ -287,6 +290,17 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values total_number_of_rows_system += rows; total_number_of_parts_system += parts; } + + // fetch the list of all parts regardless of their state + auto all_parts = table_merge_tree->getAllDataPartsVector(); + + for (size_t part_number = 0; part_number < all_parts.size(); ++part_number) + { + const auto & part = all_parts[part_number]; + + total_primary_key_bytes_memory += part->getIndexSizeInBytes(); + total_primary_key_bytes_memory_allocated += part->getIndexSizeInAllocatedBytes(); + } } if (StorageReplicatedMergeTree * table_replicated_merge_tree = typeid_cast(table.get())) @@ -341,11 +355,14 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values new_values["TotalPartsOfMergeTreeTables"] = { total_number_of_parts, "Total amount of data parts in all tables of MergeTree family." " Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key." }; - new_values["NumberOfTablesSystem"] = { total_number_of_tables_system, "Total number of tables in the system database on the server stored in tables of MergeTree family."}; + new_values["NumberOfTablesSystem"] = { total_number_of_tables_system, "Total number of tables in the system database on the server stored in tables of MergeTree family." }; new_values["TotalBytesOfMergeTreeTablesSystem"] = { total_number_of_bytes_system, "Total amount of bytes (compressed, including data and indices) stored in tables of MergeTree family in the system database." }; new_values["TotalRowsOfMergeTreeTablesSystem"] = { total_number_of_rows_system, "Total amount of rows (records) stored in tables of MergeTree family in the system database." }; new_values["TotalPartsOfMergeTreeTablesSystem"] = { total_number_of_parts_system, "Total amount of data parts in tables of MergeTree family in the system database." }; + + new_values["TotalPrimaryKeyBytesInMemory"] = { total_primary_key_bytes_memory, "The total amount of memory (in bytes) used by primary key values." }; + new_values["TotalPrimaryKeyBytesInMemoryAllocated"] = { total_primary_key_bytes_memory_allocated, "The total amount of memory (in bytes) reserved for primary key values." }; } #if USE_NURAFT From 41b2b63ceefa83fc2e7fb89fff6fa4641b2443a4 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Tue, 5 Dec 2023 17:47:35 -0800 Subject: [PATCH 02/88] add docs --- docs/en/operations/system-tables/asynchronous_metrics.md | 8 ++++++++ utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 2 files changed, 10 insertions(+) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index e46b495239c..178506a7e4d 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -547,6 +547,14 @@ Total amount of bytes (compressed, including data and indices) stored in all tab Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key. +### TotalPrimaryKeyBytesInMemory + +The total amount of memory (in bytes) used by primary key values. + +### TotalPrimaryKeyBytesInMemoryAllocated + +The total amount of memory (in bytes) reserved for primary key values. + ### TotalRowsOfMergeTreeTables Total amount of rows (records) stored in all tables of MergeTree family. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6eca291279c..eae691d94c5 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -700,6 +700,8 @@ PrettySpaceMonoBlock PrettySpaceNoEscapes PrettySpaceNoEscapesMonoBlock Prewhere +TotalPrimaryKeyBytesInMemory +TotalPrimaryKeyBytesInMemoryAllocated PrivateKeyPassphraseHandler ProfileEvents Profiler From 6125c1692b6864c64f59ccd83606ab0551b757db Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Wed, 6 Dec 2023 08:40:09 -0800 Subject: [PATCH 03/88] only account for active parts --- src/Interpreters/ServerAsynchronousMetrics.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index d000ecd684e..5bca16244c7 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -291,8 +291,8 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values total_number_of_parts_system += parts; } - // fetch the list of all parts regardless of their state - auto all_parts = table_merge_tree->getAllDataPartsVector(); + // only fetch the parts which are in active state + auto all_parts = table_merge_tree->getDataPartsVectorForInternalUsage(); for (size_t part_number = 0; part_number < all_parts.size(); ++part_number) { From 029a0c0b773ed3cff8dd7a31a8d569953bb8fb18 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Wed, 6 Dec 2023 12:30:39 -0800 Subject: [PATCH 04/88] fix clang-tidy --- src/Interpreters/ServerAsynchronousMetrics.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 5bca16244c7..3d1e1782e6e 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -294,10 +294,8 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values // only fetch the parts which are in active state auto all_parts = table_merge_tree->getDataPartsVectorForInternalUsage(); - for (size_t part_number = 0; part_number < all_parts.size(); ++part_number) + for (const auto & part : all_parts) { - const auto & part = all_parts[part_number]; - total_primary_key_bytes_memory += part->getIndexSizeInBytes(); total_primary_key_bytes_memory_allocated += part->getIndexSizeInAllocatedBytes(); } From 32816d7e7ee1f424b42f7692652e1cb62555c0a8 Mon Sep 17 00:00:00 2001 From: Pengyuan Bian Date: Mon, 18 Dec 2023 04:07:54 +0000 Subject: [PATCH 05/88] Add a setting to specify s3 disk is read only. --- src/Core/Settings.h | 1 + src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 7 ++++++- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 ++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 69efedf5d3e..6028fe2c1cf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -108,6 +108,7 @@ class IColumn; M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. This only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \ + M(Bool, s3_read_only, false, "Whether the s3 disk is read only or not. This is useful when creating a read only table with s3_plain type s3 disk", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index fdf82430812..c8b3aeaca28 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -22,11 +22,13 @@ struct S3ObjectStorageSettings const S3Settings::RequestSettings & request_settings_, uint64_t min_bytes_for_seek_, int32_t list_object_keys_size_, - int32_t objects_chunk_size_to_delete_) + int32_t objects_chunk_size_to_delete_, + bool read_only_) : request_settings(request_settings_) , min_bytes_for_seek(min_bytes_for_seek_) , list_object_keys_size(list_object_keys_size_) , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + , read_only(read_only_) {} S3Settings::RequestSettings request_settings; @@ -34,6 +36,7 @@ struct S3ObjectStorageSettings uint64_t min_bytes_for_seek; int32_t list_object_keys_size; int32_t objects_chunk_size_to_delete; + bool read_only; }; @@ -166,6 +169,8 @@ public: ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override; + bool isReadOnly() const override { return s3_settings.get()->read_only; } + private: void setNewSettings(std::unique_ptr && s3_settings_); diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 2ddde4021b3..358116ed262 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -34,7 +34,8 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC request_settings, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".list_object_keys_size", 1000), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + config.getBool(config_prefix + ".s3_read_only", false)); } std::unique_ptr getClient( From 02311d1619128b8c07998eb369383a183f7ef85f Mon Sep 17 00:00:00 2001 From: Pengyuan Bian Date: Mon, 18 Dec 2023 04:23:26 +0000 Subject: [PATCH 06/88] update. --- src/Core/Settings.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6028fe2c1cf..69efedf5d3e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -108,7 +108,6 @@ class IColumn; M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. This only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \ - M(Bool, s3_read_only, false, "Whether the s3 disk is read only or not. This is useful when creating a read only table with s3_plain type s3 disk", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \ M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \ From 73f71cc9fc872941442d3615e5df044005ca24e7 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Mon, 18 Dec 2023 11:50:54 -0800 Subject: [PATCH 07/88] add a test --- .../__init__.py | 0 .../asynchronous_metrics_update_period_s.xml | 3 + .../test.py | 57 +++++++++++++++++++ 3 files changed, 60 insertions(+) create mode 100644 tests/integration/test_asynchronous_metrics_pk_bytes_fields/__init__.py create mode 100644 tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml create mode 100644 tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/__init__.py b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml new file mode 100644 index 00000000000..47e88730482 --- /dev/null +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml @@ -0,0 +1,3 @@ + + 1 + diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py new file mode 100644 index 00000000000..23123daf805 --- /dev/null +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py @@ -0,0 +1,57 @@ +import time + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", + with_zookeeper=True, + main_configs=["configs/asynchronous_metrics_update_period_s.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_total_pk_bytes_in_memory_fields(started_cluster): + try: + cluster.start() + node1.query("SET log_queries = 1;") + node1.query("CREATE DATABASE replica;") + query_create = """CREATE TABLE replica.test + ( + id Int64, + event_time DateTime + ) + Engine=MergeTree() + PARTITION BY toYYYYMMDD(event_time) + ORDER BY id;""" + time.sleep(2) + node1.query(query_create) + node1.query("""INSERT INTO replica.test VALUES (1, now())""") + node1.query("SYSTEM FLUSH LOGS;") + + # query system.asynchronous_metrics + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric='TotalPrimaryKeyBytesInMemory';" + assert "ok\n" in node1.query(test_query) + + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric='TotalPrimaryKeyBytesInMemoryAllocated';" + assert "ok\n" in node1.query(test_query) + + # query system.asynchronous_metric_log + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric='TotalPrimaryKeyBytesInMemory';" + assert "ok\n" in node1.query(test_query) + + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric='TotalPrimaryKeyBytesInMemoryAllocated';" + assert "ok\n" in node1.query(test_query) + + finally: + cluster.shutdown() From 6273c5920a897a5781e42b1479c3243c3a196ea3 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Tue, 19 Dec 2023 08:26:54 -0800 Subject: [PATCH 08/88] fix tests --- .../asynchronous_metrics_update_period_s.xml | 1 + .../test.py | 52 ++++++++----------- 2 files changed, 24 insertions(+), 29 deletions(-) diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml index 47e88730482..0a56d734805 100644 --- a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml @@ -1,3 +1,4 @@ 1 + 1 diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py index 23123daf805..52beedfe8e8 100644 --- a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py @@ -22,36 +22,30 @@ def started_cluster(): def test_total_pk_bytes_in_memory_fields(started_cluster): - try: - cluster.start() - node1.query("SET log_queries = 1;") - node1.query("CREATE DATABASE replica;") - query_create = """CREATE TABLE replica.test - ( - id Int64, - event_time DateTime - ) - Engine=MergeTree() - PARTITION BY toYYYYMMDD(event_time) - ORDER BY id;""" - time.sleep(2) - node1.query(query_create) - node1.query("""INSERT INTO replica.test VALUES (1, now())""") - node1.query("SYSTEM FLUSH LOGS;") + cluster.start() + node1.query("SET log_queries = 1;") + query_create = """CREATE TABLE test + ( + id Int64, + event_time DateTime + ) + Engine=MergeTree() + PARTITION BY toYYYYMMDD(event_time) + ORDER BY id;""" + node1.query(query_create) + node1.query("""INSERT INTO test VALUES (1, now())""") + node1.query("SYSTEM FLUSH LOGS;") - # query system.asynchronous_metrics - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric='TotalPrimaryKeyBytesInMemory';" - assert "ok\n" in node1.query(test_query) + # query system.asynchronous_metrics + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric = 'TotalPrimaryKeyBytesInMemory';" + assert "ok\n" in node1.query(test_query) - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric='TotalPrimaryKeyBytesInMemoryAllocated';" - assert "ok\n" in node1.query(test_query) + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric = 'TotalPrimaryKeyBytesInMemoryAllocated';" + assert "ok\n" in node1.query(test_query) - # query system.asynchronous_metric_log - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric='TotalPrimaryKeyBytesInMemory';" - assert "ok\n" in node1.query(test_query) + # query system.asynchronous_metric_log + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric = 'TotalPrimaryKeyBytesInMemory';" + assert "ok\n" in node1.query(test_query) - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric='TotalPrimaryKeyBytesInMemoryAllocated';" - assert "ok\n" in node1.query(test_query) - - finally: - cluster.shutdown() + test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric = 'TotalPrimaryKeyBytesInMemoryAllocated';" + assert "ok\n" in node1.query(test_query) From ecd2843b4711100b7dd185586138d4ced38912b8 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Tue, 19 Dec 2023 08:35:08 -0800 Subject: [PATCH 09/88] add a clarification to docs --- docs/en/operations/system-tables/asynchronous_metrics.md | 4 ++-- src/Interpreters/ServerAsynchronousMetrics.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 178506a7e4d..a128815961e 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -549,11 +549,11 @@ Total amount of data parts in all tables of MergeTree family. Numbers larger tha ### TotalPrimaryKeyBytesInMemory -The total amount of memory (in bytes) used by primary key values. +The total amount of memory (in bytes) used by primary key values (only takes active parts into account). ### TotalPrimaryKeyBytesInMemoryAllocated -The total amount of memory (in bytes) reserved for primary key values. +The total amount of memory (in bytes) reserved for primary key values (only takes active parts into account). ### TotalRowsOfMergeTreeTables diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 52aaf89fc12..31d4a4e51a4 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -365,8 +365,8 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values new_values["TotalRowsOfMergeTreeTablesSystem"] = { total_number_of_rows_system, "Total amount of rows (records) stored in tables of MergeTree family in the system database." }; new_values["TotalPartsOfMergeTreeTablesSystem"] = { total_number_of_parts_system, "Total amount of data parts in tables of MergeTree family in the system database." }; - new_values["TotalPrimaryKeyBytesInMemory"] = { total_primary_key_bytes_memory, "The total amount of memory (in bytes) used by primary key values." }; - new_values["TotalPrimaryKeyBytesInMemoryAllocated"] = { total_primary_key_bytes_memory_allocated, "The total amount of memory (in bytes) reserved for primary key values." }; + new_values["TotalPrimaryKeyBytesInMemory"] = { total_primary_key_bytes_memory, "The total amount of memory (in bytes) used by primary key values (only takes active parts into account)." }; + new_values["TotalPrimaryKeyBytesInMemoryAllocated"] = { total_primary_key_bytes_memory_allocated, "The total amount of memory (in bytes) reserved for primary key values (only takes active parts into account)." }; } #if USE_NURAFT From 929bc01281639761e16c302ffa21a89d351af8ef Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Thu, 21 Dec 2023 11:20:16 -0800 Subject: [PATCH 10/88] better test with retries --- .../asynchronous_metrics_update_period_s.xml | 1 - .../test.py | 130 ++++++++++++++---- 2 files changed, 105 insertions(+), 26 deletions(-) diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml index 0a56d734805..47e88730482 100644 --- a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/configs/asynchronous_metrics_update_period_s.xml @@ -1,4 +1,3 @@ 1 - 1 diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py index 52beedfe8e8..be8e4aa150f 100644 --- a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py @@ -1,12 +1,10 @@ -import time - import pytest +import time from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance( - "node1", - with_zookeeper=True, +node = cluster.add_instance( + "node", main_configs=["configs/asynchronous_metrics_update_period_s.xml"], ) @@ -21,31 +19,113 @@ def started_cluster(): cluster.shutdown() +def greater(a, b): + return b > a + + +def lesser(a, b): + return b < a + + +def query_until_condition(a, b, condition, retries=20, timeout=60, delay=0.5): + """ + + :param a: could be an input lambda that returns an int or just an int + :param b: could be an input lambda that returns an int or just an int + :param condition: lambda that returns a boolean after comparing a and b + :param retries: number of times to retry until the condition is met + :param timeout: time in seconds after which stop retrying + :param delay: time in seconds between each retry + :return: values of a and b (value post evaluation if lambda) + """ + retries_done = 0 + start_time = time.time() + while True: + res1 = a() if callable(a) else a + res2 = b() if callable(b) else b + if condition(res1, res2): + return res1, res2 + retries_done += 1 + if retries_done >= retries or (time.time() - start_time) > timeout: + return res1, res2 + time.sleep(delay) + + def test_total_pk_bytes_in_memory_fields(started_cluster): - cluster.start() - node1.query("SET log_queries = 1;") - query_create = """CREATE TABLE test + query_create = """CREATE TABLE test_pk_bytes ( - id Int64, - event_time DateTime + a UInt64, + b UInt64 ) Engine=MergeTree() - PARTITION BY toYYYYMMDD(event_time) - ORDER BY id;""" - node1.query(query_create) - node1.query("""INSERT INTO test VALUES (1, now())""") - node1.query("SYSTEM FLUSH LOGS;") + ORDER BY a SETTINGS index_granularity=1""" + node.query(query_create) - # query system.asynchronous_metrics - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric = 'TotalPrimaryKeyBytesInMemory';" - assert "ok\n" in node1.query(test_query) + query_pk_bytes = "SELECT value FROM system.asynchronous_metrics WHERE metric = 'TotalPrimaryKeyBytesInMemory';" + query_pk_bytes_allocated = """SELECT value FROM system.asynchronous_metrics + WHERE metric = 'TotalPrimaryKeyBytesInMemoryAllocated';""" - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metrics WHERE metric = 'TotalPrimaryKeyBytesInMemoryAllocated';" - assert "ok\n" in node1.query(test_query) + # query for metrics before inserting anything into the table + pk_bytes_before = int(node.query(query_pk_bytes).strip()) + pk_bytes_allocated_before = int(node.query(query_pk_bytes_allocated).strip()) - # query system.asynchronous_metric_log - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric = 'TotalPrimaryKeyBytesInMemory';" - assert "ok\n" in node1.query(test_query) + # insert data into the table and select + node.query( + """INSERT INTO test_pk_bytes SELECT number + 20, number * 20 from numbers(1000000)""" + ) - test_query = "SELECT count() > 0 ? 'ok' : 'fail' FROM system.asynchronous_metric_log WHERE metric = 'TotalPrimaryKeyBytesInMemoryAllocated';" - assert "ok\n" in node1.query(test_query) + node.query("""SELECT * FROM test_pk_bytes where a > 1000000""") + + # functions to query primary key bytes used and allocated in memory + def res_pk_bytes(): + return int(node.query(query_pk_bytes).strip()) + + def res_pk_bytes_allocated(): + return int(node.query(query_pk_bytes_allocated).strip()) + + # query again after data insertion (make a reasonable amount of retries) + # metrics should be greater after inserting data + pk_bytes_before, pk_bytes_after = query_until_condition( + pk_bytes_before, res_pk_bytes, condition=greater + ) + assert pk_bytes_after > pk_bytes_before + + pk_bytes_allocated_before, pk_bytes_allocated_after = query_until_condition( + pk_bytes_allocated_before, res_pk_bytes_allocated, condition=greater + ) + assert pk_bytes_allocated_after > pk_bytes_allocated_before + + # insert some more data + node.query( + """INSERT INTO test_pk_bytes SELECT number + 100, number * 200 from numbers(1000000)""" + ) + node.query("""SELECT * FROM test_pk_bytes""") + + # query again and compare the metrics. + # metrics should be greater after inserting more data + pk_bytes_after, pk_bytes_after_2 = query_until_condition( + pk_bytes_after, res_pk_bytes, condition=greater + ) + assert pk_bytes_after_2 > pk_bytes_after + + pk_bytes_allocated_after, pk_bytes_allocated_after_2 = query_until_condition( + pk_bytes_allocated_after, res_pk_bytes_allocated, condition=greater + ) + assert pk_bytes_allocated_after_2 > pk_bytes_allocated_after + + # alter the table to drop some data + node.query( + "ALTER table test_pk_bytes DELETE where a < 1000000 SETTINGS mutations_sync=1;" + ) + + # query again and compare the metrics. + # metrics should be lesser after dropping some data + before_drop, after_drop = query_until_condition( + pk_bytes_after_2, res_pk_bytes, condition=lesser + ) + assert before_drop > after_drop + + before_drop, after_drop = query_until_condition( + pk_bytes_allocated_after_2, res_pk_bytes_allocated, condition=lesser + ) + assert before_drop > after_drop From ee199877d666503a9858c9f41b80fb83e7c31a18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 22 Dec 2023 07:27:48 +0300 Subject: [PATCH 11/88] Update diskSettings.cpp --- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 358116ed262..d1137f7620a 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -35,7 +35,7 @@ std::unique_ptr getSettings(const Poco::Util::AbstractC config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".list_object_keys_size", 1000), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - config.getBool(config_prefix + ".s3_read_only", false)); + config.getBool(config_prefix + ".readonly", false)); } std::unique_ptr getClient( From 2be2486e947cd249f52a023341715712c4f1e9e6 Mon Sep 17 00:00:00 2001 From: Dani Pozo Date: Fri, 22 Dec 2023 17:28:43 +0100 Subject: [PATCH 12/88] Remove retryStrategy assignments overwritten in ClientFactory::create() --- src/Backups/BackupIO_S3.cpp | 1 - src/Disks/ObjectStorages/S3/diskSettings.cpp | 4 ---- 2 files changed, 5 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 74195a93072..9681887aa8b 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -68,7 +68,6 @@ namespace client_configuration.connectTimeoutMs = 10 * 1000; /// Requests in backups can be extremely long, set to one hour client_configuration.requestTimeoutMs = 60 * 60 * 1000; - client_configuration.retryStrategy = std::make_shared(request_settings.retry_attempts); return S3::ClientFactory::instance().create( client_configuration, diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 2ddde4021b3..b58131fd432 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -92,10 +92,6 @@ std::unique_ptr getClient( HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config); S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config); - client_configuration.retryStrategy - = std::make_shared( - config.getUInt64(config_prefix + ".retry_attempts", settings.request_settings.retry_attempts)); - return S3::ClientFactory::instance().create( client_configuration, uri.is_virtual_hosted_style, From d2f0fe28dc485ccb6c8f0507dabbf56b7bdacbcb Mon Sep 17 00:00:00 2001 From: Bharat Nallan Chakravarthy Date: Fri, 22 Dec 2023 08:18:15 -0800 Subject: [PATCH 13/88] fix test query --- .../test_asynchronous_metrics_pk_bytes_fields/test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py index be8e4aa150f..154048df35c 100644 --- a/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py +++ b/tests/integration/test_asynchronous_metrics_pk_bytes_fields/test.py @@ -113,10 +113,8 @@ def test_total_pk_bytes_in_memory_fields(started_cluster): ) assert pk_bytes_allocated_after_2 > pk_bytes_allocated_after - # alter the table to drop some data - node.query( - "ALTER table test_pk_bytes DELETE where a < 1000000 SETTINGS mutations_sync=1;" - ) + # drop all the data + node.query("TRUNCATE table test_pk_bytes;") # query again and compare the metrics. # metrics should be lesser after dropping some data @@ -129,3 +127,6 @@ def test_total_pk_bytes_in_memory_fields(started_cluster): pk_bytes_allocated_after_2, res_pk_bytes_allocated, condition=lesser ) assert before_drop > after_drop + + # finally drop the table + node.query("DROP table test_pk_bytes;") From 269e9706fb9fe82d1f2b44167326faa4e76f897e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 21 Dec 2023 14:30:09 +0300 Subject: [PATCH 14/88] MergeTree FINAL extract non intersecting parts ranges --- src/Processors/QueryPlan/PartsSplitter.cpp | 564 +++++++++++++++--- src/Processors/QueryPlan/PartsSplitter.h | 22 +- .../QueryPlan/ReadFromMergeTree.cpp | 86 +-- ...inal_split_ranges_by_primary_key.reference | 85 +++ ...tree_final_split_ranges_by_primary_key.sql | 34 ++ 5 files changed, 627 insertions(+), 164 deletions(-) create mode 100644 tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.reference create mode 100644 tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 8bf877cf8b9..6f49bcce25c 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -30,6 +30,23 @@ std::string toString(const Values & value) return fmt::format("({})", fmt::join(value, ", ")); } +int compareValues(const Values & lhs, const Values & rhs) +{ + chassert(lhs.size() == rhs.size()); + + for (size_t i = 0; i < lhs.size(); ++i) + { + if (applyVisitor(FieldVisitorAccurateLess(), lhs[i], rhs[i])) + return -1; + + if (!applyVisitor(FieldVisitorAccurateEquals(), lhs[i], rhs[i])) + return 1; + } + + return 0; +} + + /// Adaptor to access PK values from index. class IndexAccess { @@ -49,6 +66,58 @@ public: return values; } + std::optional findRightmostMarkLessThanValueInRange(size_t part_index, Values value, size_t range_begin, size_t range_end) const + { + size_t left = range_begin; + size_t right = range_end; + + while (left < right) + { + size_t middle = left + (right - left) / 2; + int compare_result = compareValues(getValue(part_index, middle), value); + if (compare_result != -1) + right = middle; + else + left = middle + 1; + } + + if (right == range_begin) + return {}; + + return right - 1; + } + + std::optional findRightmostMarkLessThanValueInRange(size_t part_index, Values value, MarkRange mark_range) const + { + return findRightmostMarkLessThanValueInRange(part_index, value, mark_range.begin, mark_range.end); + } + + std::optional findLeftmostMarkGreaterThanValueInRange(size_t part_index, Values value, size_t range_begin, size_t range_end) const + { + size_t left = range_begin; + size_t right = range_end; + + while (left < right) + { + size_t middle = left + (right - left) / 2; + int compare_result = compareValues(getValue(part_index, middle), value); + if (compare_result != 1) + left = middle + 1; + else + right = middle; + } + + if (left == range_end) + return {}; + + return left; + } + + std::optional findLeftmostMarkGreaterThanValueInRange(size_t part_index, Values value, MarkRange mark_range) const + { + return findLeftmostMarkGreaterThanValueInRange(part_index, value, mark_range.begin, mark_range.end); + } + size_t getMarkRows(size_t part_idx, size_t mark) const { return parts[part_idx].data_part->index_granularity.getMarkRows(mark); } size_t getTotalRowCount() const @@ -63,67 +132,367 @@ private: const RangesInDataParts & parts; }; - -/// Splits parts into layers, each layer will contain parts subranges with PK values from its own range. -/// Will try to produce exactly max_layer layers but may return less if data is distributed in not a very parallelizable way. -std::pair, std::vector> split(RangesInDataParts parts, size_t max_layers) +class RangesInDataPartsBuilder { +public: + explicit RangesInDataPartsBuilder(const RangesInDataParts & initial_ranges_in_data_parts_) : initial_ranges_in_data_parts(initial_ranges_in_data_parts_) { } + + void addRange(size_t part_index, MarkRange mark_range) + { + auto [it, inserted] = part_index_to_current_ranges_in_data_parts_index.emplace(part_index, ranges_in_data_parts.size()); + + if (inserted) + { + ranges_in_data_parts.emplace_back( + initial_ranges_in_data_parts[part_index].data_part, + initial_ranges_in_data_parts[part_index].alter_conversions, + initial_ranges_in_data_parts[part_index].part_index_in_query, + MarkRanges{mark_range}); + part_index_to_initial_ranges_in_data_parts_index[it->second] = part_index; + return; + } + + ranges_in_data_parts[it->second].ranges.push_back(mark_range); + } + + RangesInDataParts & getCurrentRangesInDataParts() + { + return ranges_in_data_parts; + } + + size_t mapPartIndexToInitialPartIndex(size_t part_index) const + { + return part_index_to_initial_ranges_in_data_parts_index.at(part_index); + } +private: + std::unordered_map part_index_to_current_ranges_in_data_parts_index; + std::unordered_map part_index_to_initial_ranges_in_data_parts_index; + RangesInDataParts ranges_in_data_parts; + const RangesInDataParts & initial_ranges_in_data_parts; +}; + +struct PartsRangesIterator +{ + enum class EventType : uint8_t + { + RangeStart = 0, + RangeEnd, + }; + + [[maybe_unused]] bool operator<(const PartsRangesIterator & other) const + { + int compare_result = compareValues(value, other.value); + if (compare_result == -1) + return true; + else if (compare_result == 1) + return false; + + // RangeStart event always before RangeEnd event + if (event != other.event) + return event < other.event; + + /// Within the same part we should process events in order of mark numbers, + /// because they already ordered by value and range ends have greater mark numbers than the beginnings. + /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. + const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; + const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; + + if (ev_mark == other_ev_mark) + return part_index < other.part_index; + + return ev_mark < other_ev_mark; + } + + [[maybe_unused]] bool operator==(const PartsRangesIterator & other) const + { + if (value.size() != other.value.size()) + return false; + + for (size_t i = 0; i < value.size(); ++i) + if (!applyVisitor(FieldVisitorAccurateEquals(), value[i], other.value[i])) + return false; + + return range == other.range && part_index == other.part_index && event == other.event; + } + + [[maybe_unused]] bool operator>(const PartsRangesIterator & other) const + { + if (operator<(other) || operator==(other)) + return false; + + return true; + } + + Values value; + MarkRange range; + size_t part_index; + EventType event; +}; + +struct SplitResult +{ + RangesInDataParts non_intersecting_parts_ranges; + std::vector borders; + std::vector layers; +}; + +SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) +{ + /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts. + * + * For each marks range we will create 2 events (RangeStart, RangeEnd), add these events into array and sort them by primary key index + * value at this event. + * + * After that we will scan sorted events and maintain current intersecting parts ranges. + * If current intersecting parts ranges is 1, for each event (RangeStart, RangeEnd) we can extract non intersecting range + * from single part range. + * + * There can be 4 possible cases: + * + * 1. RangeStart after RangeStart: + * + * Example: + * + * range 1 [---- ... + * range 2 [(value_1) ... + * + * In this scenario we can extract non intersecting part of range 1. This non intersecting part will have start + * of range 1 and end with rightmost mark from range 1 that contains value less than value_1. + * + * 2. RangeStart after RangeEnd: + * + * Example: + * + * range 1 [ ---- ... + * range 2 [ (value_1)] + * range 3 [(value_2) ... + * + * In this case we can extract non intersecting part of range 1. This non intersecting part will have start + * of leftmost mark from range 1 that contains value greater than value_1 and end with rightmost mark from range 1 + * that contains value less than value_2. + * + * 3. RangeEnd after RangeStart: + * + * Example: + * + * range 1 [----] + * + * In this case we can extract range 1 as non intersecting. + * + * 4. RangeEnd after RangeEnd + * + * Example: + * + * range 1 [ ... ----] + * range 2 [ ... (value_1)] + * + * In this case we can extract non intersecting part of range 1. This non intersecting part will have start + * of leftmost mark from range 1 that contains value greater than value_1 and end with range 1 end. + * + * Additional details: + * + * 1. If part level is 0, we must process all ranges from this part, because they can contain duplicate primary keys. + * 2. If non intersecting range is small, it is better to not add it to non intersecting ranges, to avoid expensive seeks. + */ + + IndexAccess index_access(ranges_in_data_parts); + std::vector parts_ranges; + + for (size_t part_index = 0; part_index < ranges_in_data_parts.size(); ++part_index) + { + for (const auto & range : ranges_in_data_parts[part_index].ranges) + { + const auto & index_granularity = ranges_in_data_parts[part_index].data_part->index_granularity; + parts_ranges.push_back( + {index_access.getValue(part_index, range.begin), range, part_index, PartsRangesIterator::EventType::RangeStart}); + + const bool value_is_defined_at_end_mark = range.end < index_granularity.getMarksCount(); + if (!value_is_defined_at_end_mark) + continue; + + parts_ranges.push_back( + {index_access.getValue(part_index, range.end), range, part_index, PartsRangesIterator::EventType::RangeEnd}); + } + } + + std::sort(parts_ranges.begin(), parts_ranges.end()); + + RangesInDataPartsBuilder intersecting_ranges_in_data_parts_builder(ranges_in_data_parts); + RangesInDataPartsBuilder non_intersecting_ranges_in_data_parts_builder(ranges_in_data_parts); + + static constexpr size_t min_number_of_marks_for_non_intersecting_range = 2; + + auto add_non_intersecting_range = [&](size_t part_index, MarkRange mark_range) + { + non_intersecting_ranges_in_data_parts_builder.addRange(part_index, mark_range); + }; + + auto add_intersecting_range = [&](size_t part_index, MarkRange mark_range) + { + intersecting_ranges_in_data_parts_builder.addRange(part_index, mark_range); + }; + + std::unordered_map part_index_start_to_range; + + chassert(parts_ranges.size() > 1); + chassert(parts_ranges[0].event == PartsRangesIterator::EventType::RangeStart); + part_index_start_to_range[parts_ranges[0].part_index] = parts_ranges[0].range; + + size_t parts_ranges_size = parts_ranges.size(); + for (size_t i = 1; i < parts_ranges_size; ++i) + { + auto & previous_part_range = parts_ranges[i - 1]; + auto & current_part_range = parts_ranges[i]; + size_t intersecting_parts = part_index_start_to_range.size(); + bool range_start = current_part_range.event == PartsRangesIterator::EventType::RangeStart; + + if (range_start) + { + auto [it, inserted] = part_index_start_to_range.emplace(current_part_range.part_index, current_part_range.range); + chassert(inserted); + + if (intersecting_parts != 1) + continue; + + if (previous_part_range.event == PartsRangesIterator::EventType::RangeStart) + { + /// If part level is 0, we must process whole previous part because it can contain duplicate primary keys + if (ranges_in_data_parts[previous_part_range.part_index].data_part->info.level == 0) + continue; + + /// Case 1 Range Start after Range Start + size_t begin = previous_part_range.range.begin; + std::optional end_optional = index_access.findRightmostMarkLessThanValueInRange(previous_part_range.part_index, + current_part_range.value, + previous_part_range.range); + + if (!end_optional) + continue; + + size_t end = *end_optional; + + if (end - begin >= min_number_of_marks_for_non_intersecting_range) + { + part_index_start_to_range[previous_part_range.part_index].begin = end; + add_non_intersecting_range(previous_part_range.part_index, MarkRange{begin, end}); + } + + continue; + } + + auto other_interval_it = part_index_start_to_range.begin(); + for (; other_interval_it != part_index_start_to_range.end(); ++other_interval_it) + { + if (other_interval_it != it) + break; + } + + chassert(other_interval_it != part_index_start_to_range.end()); + size_t other_interval_part_index = other_interval_it->first; + MarkRange other_interval_range = other_interval_it->second; + + /// If part level is 0, we must process whole other intersecting part because it can contain duplicate primary keys + if (ranges_in_data_parts[other_interval_part_index].data_part->info.level == 0) + continue; + + /// Case 2 Range Start after Range End + std::optional begin_optional = index_access.findLeftmostMarkGreaterThanValueInRange(other_interval_part_index, + previous_part_range.value, + other_interval_range); + if (!begin_optional) + continue; + + std::optional end_optional = index_access.findRightmostMarkLessThanValueInRange(other_interval_part_index, + current_part_range.value, + other_interval_range); + if (!end_optional) + continue; + + size_t begin = *end_optional; + size_t end = *end_optional; + + if (end - begin >= min_number_of_marks_for_non_intersecting_range) + { + other_interval_it->second.begin = end; + add_intersecting_range(other_interval_part_index, MarkRange{other_interval_range.begin, begin}); + add_non_intersecting_range(other_interval_part_index, MarkRange{begin, end}); + } + continue; + } + + chassert(current_part_range.event == PartsRangesIterator::EventType::RangeEnd); + + /** If there are more than 1 part ranges that we are currently processing + * that means that this part range is interesecting with other range. + * + * If part level is 0, we must process whole part because it can contain duplicate primary keys. + */ + if (intersecting_parts != 1 || ranges_in_data_parts[current_part_range.part_index].data_part->info.level == 0) + { + add_intersecting_range(current_part_range.part_index, part_index_start_to_range[current_part_range.part_index]); + part_index_start_to_range.erase(current_part_range.part_index); + continue; + } + + if (previous_part_range.event == PartsRangesIterator::EventType::RangeStart) + { + chassert(current_part_range.part_index == previous_part_range.part_index); + chassert(current_part_range.range == previous_part_range.range); + + /// Case 3 Range End after Range Start + non_intersecting_ranges_in_data_parts_builder.addRange(current_part_range.part_index, current_part_range.range); + part_index_start_to_range.erase(current_part_range.part_index); + continue; + } + + chassert(previous_part_range.event == PartsRangesIterator::EventType::RangeEnd); + chassert(previous_part_range.part_index != current_part_range.part_index); + + /// Case 4 Range End after Range End + std::optional begin_optional = index_access.findLeftmostMarkGreaterThanValueInRange(current_part_range.part_index, + previous_part_range.value, + current_part_range.range); + size_t end = current_part_range.range.end; + + if (begin_optional && end - *begin_optional >= min_number_of_marks_for_non_intersecting_range) + { + size_t begin = *begin_optional; + add_intersecting_range(current_part_range.part_index, MarkRange{part_index_start_to_range[current_part_range.part_index].begin, begin}); + add_non_intersecting_range(current_part_range.part_index, MarkRange{begin, end}); + } + else + { + add_intersecting_range(current_part_range.part_index, MarkRange{part_index_start_to_range[current_part_range.part_index].begin, end}); + } + + part_index_start_to_range.erase(current_part_range.part_index); + } + + auto & non_intersecting_ranges_in_data_parts = non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts(); + auto & intersecting_ranges_in_data_parts = intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts(); + // We will advance the iterator pointing to the mark with the smallest PK value until // there will be not less than rows_per_layer rows in the current layer (roughly speaking). // Then we choose the last observed value as the new border, so the current layer will consists // of granules with values greater than the previous mark and less or equal than the new border. - struct PartsRangesIterator + std::priority_queue, std::greater<>> parts_ranges_queue; + for (size_t part_index = 0; part_index < intersecting_ranges_in_data_parts.size(); ++part_index) { - struct MarkRangeWithPartIdx : MarkRange - { - size_t part_idx; - }; - - enum class EventType - { - RangeStart, - RangeEnd, - }; - - [[maybe_unused]] bool operator<(const PartsRangesIterator & other) const - { - // Accurate comparison of `value > other.value` - for (size_t i = 0; i < value.size(); ++i) - { - if (applyVisitor(FieldVisitorAccurateLess(), value[i], other.value[i])) - return false; - - if (!applyVisitor(FieldVisitorAccurateEquals(), value[i], other.value[i])) - return true; - } - - /// Within the same part we should process events in order of mark numbers, - /// because they already ordered by value and range ends have greater mark numbers than the beginnings. - /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. - const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; - const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; - return ev_mark > other_ev_mark; - } - - Values value; - MarkRangeWithPartIdx range; - EventType event; - }; - - const auto index_access = std::make_unique(parts); - std::priority_queue parts_ranges_queue; - for (size_t part_idx = 0; part_idx < parts.size(); ++part_idx) - { - for (const auto & range : parts[part_idx].ranges) + size_t initial_part_index = intersecting_ranges_in_data_parts_builder.mapPartIndexToInitialPartIndex(part_index); + + for (const auto & range : intersecting_ranges_in_data_parts[part_index].ranges) { + const auto & index_granularity = intersecting_ranges_in_data_parts[part_index].data_part->index_granularity; parts_ranges_queue.push( - {index_access->getValue(part_idx, range.begin), {range, part_idx}, PartsRangesIterator::EventType::RangeStart}); - const auto & index_granularity = parts[part_idx].data_part->index_granularity; + {index_access.getValue(initial_part_index, range.begin), range, initial_part_index, PartsRangesIterator::EventType::RangeStart}); + const bool value_is_defined_at_end_mark = range.end < index_granularity.getMarksCount(); - if (value_is_defined_at_end_mark) - parts_ranges_queue.push( - {index_access->getValue(part_idx, range.end), {range, part_idx}, PartsRangesIterator::EventType::RangeEnd}); + if (!value_is_defined_at_end_mark) + continue; + + parts_ranges_queue.push( + {index_access.getValue(initial_part_index, range.end), range, initial_part_index, PartsRangesIterator::EventType::RangeEnd}); } } @@ -136,7 +505,7 @@ std::pair, std::vector> split(RangesInDat std::vector borders; std::vector result_layers; - const size_t rows_per_layer = std::max(index_access->getTotalRowCount() / max_layers, 1); + const size_t rows_per_layer = std::max(index_access.getTotalRowCount() / max_layers, 1); while (!parts_ranges_queue.empty()) { @@ -152,9 +521,7 @@ std::pair, std::vector> split(RangesInDat return marks_in_current_layer < intersected_parts * 2; }; - auto & current_layer = result_layers.emplace_back(); - /// Map part_idx into index inside layer, used to merge marks from the same part into one reader - std::unordered_map part_idx_in_layer; + RangesInDataPartsBuilder current_layer_builder(ranges_in_data_parts); while (rows_in_current_layer < rows_per_layer || layers_intersection_is_too_big() || result_layers.size() == max_layers) { @@ -164,57 +531,52 @@ std::pair, std::vector> split(RangesInDat { auto current = parts_ranges_queue.top(); parts_ranges_queue.pop(); - const auto part_idx = current.range.part_idx; + const auto part_index = current.part_index; if (current.event == PartsRangesIterator::EventType::RangeEnd) { - const auto & mark = MarkRange{current_part_range_begin[part_idx], current.range.end}; - auto it = part_idx_in_layer.emplace(std::make_pair(part_idx, current_layer.size())); - if (it.second) - current_layer.emplace_back( - parts[part_idx].data_part, - parts[part_idx].alter_conversions, - parts[part_idx].part_index_in_query, - MarkRanges{mark}); - else - current_layer[it.first->second].ranges.push_back(mark); - - current_part_range_begin.erase(part_idx); - current_part_range_end.erase(part_idx); + current_layer_builder.addRange(part_index, MarkRange{current_part_range_begin[part_index], current.range.end}); + current_part_range_begin.erase(part_index); + current_part_range_end.erase(part_index); continue; } last_value = std::move(current.value); - rows_in_current_layer += index_access->getMarkRows(part_idx, current.range.begin); - marks_in_current_layer++; - current_part_range_begin.try_emplace(part_idx, current.range.begin); - current_part_range_end[part_idx] = current.range.begin; + rows_in_current_layer += index_access.getMarkRows(part_index, current.range.begin); + ++marks_in_current_layer; + + current_part_range_begin.try_emplace(part_index, current.range.begin); + current_part_range_end[part_index] = current.range.begin; + if (current.range.begin + 1 < current.range.end) { - current.range.begin++; - current.value = index_access->getValue(part_idx, current.range.begin); + ++current.range.begin; + current.value = index_access.getValue(part_index, current.range.begin); parts_ranges_queue.push(std::move(current)); } } + if (parts_ranges_queue.empty()) break; + if (rows_in_current_layer >= rows_per_layer && !layers_intersection_is_too_big() && result_layers.size() < max_layers) borders.push_back(last_value); } - for (const auto & [part_idx, last_mark] : current_part_range_end) + + for (const auto & [part_index, last_mark] : current_part_range_end) { - const auto & mark = MarkRange{current_part_range_begin[part_idx], last_mark + 1}; - auto it = part_idx_in_layer.emplace(std::make_pair(part_idx, current_layer.size())); - - if (it.second) - result_layers.back().emplace_back( - parts[part_idx].data_part, parts[part_idx].alter_conversions, parts[part_idx].part_index_in_query, MarkRanges{mark}); - else - current_layer[it.first->second].ranges.push_back(mark); - - current_part_range_begin[part_idx] = current_part_range_end[part_idx]; + current_layer_builder.addRange(part_index, MarkRange{current_part_range_begin[part_index], last_mark + 1}); + current_part_range_begin[part_index] = current_part_range_end[part_index]; } + + result_layers.push_back(std::move(current_layer_builder.getCurrentRangesInDataParts())); } + + std::stable_sort( + non_intersecting_ranges_in_data_parts.begin(), + non_intersecting_ranges_in_data_parts.end(), + [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; }); + for (auto & layer : result_layers) { std::stable_sort( @@ -223,7 +585,7 @@ std::pair, std::vector> split(RangesInDat [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; }); } - return {std::move(borders), std::move(result_layers)}; + return {std::move(non_intersecting_ranges_in_data_parts), std::move(borders), std::move(result_layers)}; } @@ -329,44 +691,54 @@ static void reorderColumns(ActionsDAG & dag, const Block & header, const std::st dag.getOutputs() = std::move(new_outputs); } -Pipes buildPipesForReadingByPKRanges( +SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( const KeyDescription & primary_key, ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, - ReadingInOrderStepGetter && reading_step_getter) + ReadingInOrderStepGetter && in_order_reading_step_getter) { if (max_layers <= 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1"); - auto && [borders, result_layers] = split(std::move(parts), max_layers); + SplitResult split_result = split(std::move(parts), max_layers); + + SplitPartsWithRangesByPrimaryKeyResult result; + result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); + + auto borders = std::move(split_result.borders); + auto result_layers = std::move(split_result.layers); auto filters = buildFilters(primary_key, borders); - Pipes pipes(result_layers.size()); for (size_t i = 0; i < result_layers.size(); ++i) { - pipes[i] = reading_step_getter(std::move(result_layers[i])); - pipes[i].addSimpleTransform([sorting_expr](const Block & header) + Pipe layer_pipe = in_order_reading_step_getter(std::move(result_layers[i])); + layer_pipe.addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); }); + auto & filter_function = filters[i]; if (!filter_function) continue; + auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); - reorderColumns(*actions, pipes[i].getHeader(), filter_function->getColumnName()); + reorderColumns(*actions, layer_pipe.getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in ({}, {}]", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); - pipes[i].addSimpleTransform( + layer_pipe.addSimpleTransform( [&](const Block & header) { auto step = std::make_shared(header, expression_actions, filter_function->getColumnName(), true); step->setDescription(description); return step; }); + + result.merging_pipes.push_back(std::move(layer_pipe)); } - return pipes; + + return result; } } diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h index 92ba6191e97..47a2f8b468c 100644 --- a/src/Processors/QueryPlan/PartsSplitter.h +++ b/src/Processors/QueryPlan/PartsSplitter.h @@ -13,15 +13,25 @@ namespace DB using ReadingInOrderStepGetter = std::function; -/// Splits parts into layers, each layer will contain parts subranges with PK values from its own range. -/// A separate pipe will be constructed for each layer with a reading step (provided by the reading_step_getter) and -/// a filter for this layer's range of PK values. -/// Will try to produce exactly max_layer pipes but may return less if data is distributed in not a very parallelizable way. -Pipes buildPipesForReadingByPKRanges( +struct SplitPartsWithRangesByPrimaryKeyResult +{ + RangesInDataParts non_intersecting_parts_ranges; + Pipes merging_pipes; +}; + +/** Splits parts ranges into: + * + * 1. Non interesecing part ranges, for parts with level > 0. + * 2. Merging layers, that contain ranges from multiple parts. A separate pipe will be constructed for each layer + * with a reading step (provided by the in_order_reading_step_getter) and a filter for this layer's range of PK values. + * + * Will try to produce exactly max_layer layers but may return less if data is distributed in not a very parallelizable way. + */ +SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( const KeyDescription & primary_key, ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, - ReadingInOrderStepGetter && reading_step_getter); + ReadingInOrderStepGetter && in_order_reading_step_getter); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 875b0d9bdbc..48efe44ed2a 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1072,9 +1072,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( it, parts_with_ranges.end(), [&it](auto & part) { return it->data_part->info.partition_id != part.data_part->info.partition_id; }); parts_to_merge_ranges.push_back(it); } - /// We divide threads for each partition equally. But we will create at least the number of partitions threads. - /// (So, the total number of threads could be more than initial num_streams. - num_streams /= (parts_to_merge_ranges.size() - 1); } else { @@ -1087,8 +1084,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// If do_not_merge_across_partitions_select_final is true and num_streams > 1 /// we will store lonely parts with level > 0 to use parallel select on them. - RangesInDataParts lonely_parts; - size_t sum_marks_in_lonely_parts = 0; + RangesInDataParts non_intersecting_parts_by_primary_key; auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); @@ -1100,32 +1096,26 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( bool no_merging_final = settings.do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && parts_to_merge_ranges[range_index]->data_part->info.level > 0; + if (no_merging_final) + { + non_intersecting_parts_by_primary_key.push_back(std::move(*parts_to_merge_ranges[range_index])); + continue; + } + Pipes pipes; { RangesInDataParts new_parts; - if (no_merging_final) - { - if (num_streams > 1) - sum_marks_in_lonely_parts += parts_to_merge_ranges[range_index]->getMarksCount(); - lonely_parts.push_back(std::move(*parts_to_merge_ranges[range_index])); - continue; - } - else - { - for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it) - { - new_parts.emplace_back(part_it->data_part, part_it->alter_conversions, part_it->part_index_in_query, part_it->ranges); - } - } + for (auto part_it = parts_to_merge_ranges[range_index]; part_it != parts_to_merge_ranges[range_index + 1]; ++part_it) + new_parts.emplace_back(part_it->data_part, part_it->alter_conversions, part_it->part_index_in_query, part_it->ranges); if (new_parts.empty()) continue; if (num_streams > 1 && metadata_for_reading->hasPrimaryKey()) { - // Let's split parts into layers to ensure data parallelism of FINAL. - auto reading_step_getter = [this, &column_names, &info](auto parts) + // Let's split parts into non intersecting parts ranges and layers to ensure data parallelism of FINAL. + auto in_order_reading_step_getter = [this, &column_names, &info](auto parts) { return this->read( std::move(parts), @@ -1136,13 +1126,19 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( info.use_uncompressed_cache); }; - pipes = buildPipesForReadingByPKRanges( + SplitPartsWithRangesByPrimaryKeyResult split_ranges_result = splitPartsWithRangesByPrimaryKey( metadata_for_reading->getPrimaryKey(), sorting_expr, std::move(new_parts), num_streams, context, - std::move(reading_step_getter)); + std::move(in_order_reading_step_getter)); + + for (auto && non_intersecting_parts_range : split_ranges_result.non_intersecting_parts_ranges) + non_intersecting_parts_by_primary_key.push_back(std::move(non_intersecting_parts_range)); + + for (auto && merging_pipe : split_ranges_result.merging_pipes) + pipes.push_back(std::move(merging_pipe)); } else { @@ -1154,10 +1150,12 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( } /// Drop temporary columns, added by 'sorting_key_expr' - if (!out_projection) + if (!out_projection && !pipes.empty()) out_projection = createProjection(pipes.front().getHeader()); } + if (pipes.empty()) + continue; Names sort_columns = metadata_for_reading->getSortingKeyColumns(); SortDescription sort_description; @@ -1183,45 +1181,9 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( merging_pipes.emplace_back(Pipe::unitePipes(std::move(pipes))); } - if (!lonely_parts.empty()) + if (!non_intersecting_parts_by_primary_key.empty()) { - Pipe pipe; - if (num_streams > 1) - { - size_t num_streams_for_lonely_parts = num_streams * lonely_parts.size(); - - const size_t min_marks_for_concurrent_read = MergeTreeDataSelectExecutor::minMarksForConcurrentRead( - settings.merge_tree_min_rows_for_concurrent_read, - settings.merge_tree_min_bytes_for_concurrent_read, - data_settings->index_granularity, - info.index_granularity_bytes, - sum_marks_in_lonely_parts); - - /// Reduce the number of num_streams_for_lonely_parts if the data is small. - if (sum_marks_in_lonely_parts < num_streams_for_lonely_parts * min_marks_for_concurrent_read - && lonely_parts.size() < num_streams_for_lonely_parts) - num_streams_for_lonely_parts = std::max( - (sum_marks_in_lonely_parts + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, - lonely_parts.size()); - - pipe = read( - std::move(lonely_parts), - origin_column_names, - ReadFromMergeTree::ReadType::Default, - num_streams_for_lonely_parts, - min_marks_for_concurrent_read, - info.use_uncompressed_cache); - } - else - { - pipe = read( - std::move(lonely_parts), - origin_column_names, - ReadFromMergeTree::ReadType::InOrder, - num_streams, - 0, - info.use_uncompressed_cache); - } + auto pipe = spreadMarkRangesAmongStreams(std::move(non_intersecting_parts_by_primary_key), num_streams, origin_column_names); no_merging_pipes.emplace_back(std::move(pipe)); } diff --git a/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.reference b/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.reference new file mode 100644 index 00000000000..59acae1c7ef --- /dev/null +++ b/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.reference @@ -0,0 +1,85 @@ +1 +-- +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +-- +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +-- +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +-- +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 +10 10 +11 11 +12 12 +13 13 +14 14 +15 15 +16 16 +17 17 +18 18 +19 19 +20 20 +21 21 +22 22 +23 23 +24 24 +25 25 +26 26 +27 27 +28 28 +29 29 +30 30 +31 31 diff --git a/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql b/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql new file mode 100644 index 00000000000..70067bcff74 --- /dev/null +++ b/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=ReplacingMergeTree ORDER BY id SETTINGS index_granularity = 2; + +INSERT INTO test_table SELECT 0, '0'; +INSERT INTO test_table SELECT number + 1, number + 1 FROM numbers(15); +OPTIMIZE TABLE test_table; + +SELECT COUNT() FROM system.parts WHERE table = 'test_table' AND active = 1; +SYSTEM STOP MERGES test_table; + +SELECT '--'; + +SELECT id, value FROM test_table FINAL ORDER BY id; + +SELECT '--'; + +INSERT INTO test_table SELECT 5, '5'; +SELECT id, value FROM test_table FINAL ORDER BY id; + +SELECT '--'; + +INSERT INTO test_table SELECT number + 8, number + 8 FROM numbers(8); +SELECT id, value FROM test_table FINAL ORDER BY id; + +SELECT '--'; + +INSERT INTO test_table SELECT number, number FROM numbers(32); +SELECT id, value FROM test_table FINAL ORDER BY id; + +DROP TABLE test_table; From 51d6c444f509651bae0ece9b5e39a07e083d0fda Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 22 Dec 2023 12:32:58 +0300 Subject: [PATCH 15/88] Fixed tests --- src/Processors/QueryPlan/PartsSplitter.cpp | 42 ++++++++++--------- ...tree_final_split_ranges_by_primary_key.sql | 2 +- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 6f49bcce25c..8804ff30bb2 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -120,14 +120,6 @@ public: size_t getMarkRows(size_t part_idx, size_t mark) const { return parts[part_idx].data_part->index_granularity.getMarkRows(mark); } - size_t getTotalRowCount() const - { - size_t total = 0; - for (const auto & part : parts) - total += part.getRowsCount(); - return total; - } - private: const RangesInDataParts & parts; }; @@ -423,7 +415,7 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) chassert(current_part_range.event == PartsRangesIterator::EventType::RangeEnd); /** If there are more than 1 part ranges that we are currently processing - * that means that this part range is interesecting with other range. + * that means that this part range is intersecting with other range. * * If part level is 0, we must process whole part because it can contain duplicate primary keys. */ @@ -468,6 +460,16 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) part_index_start_to_range.erase(current_part_range.part_index); } + /// Process parts ranges with undefined value at end mark + bool is_intersecting = part_index_start_to_range.size() > 1; + for (const auto & [part_index, mark_range] : part_index_start_to_range) + { + if (is_intersecting) + add_intersecting_range(part_index, mark_range); + else + add_non_intersecting_range(part_index, mark_range); + } + auto & non_intersecting_ranges_in_data_parts = non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts(); auto & intersecting_ranges_in_data_parts = intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts(); @@ -477,6 +479,7 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) // of granules with values greater than the previous mark and less or equal than the new border. std::priority_queue, std::greater<>> parts_ranges_queue; + for (size_t part_index = 0; part_index < intersecting_ranges_in_data_parts.size(); ++part_index) { size_t initial_part_index = intersecting_ranges_in_data_parts_builder.mapPartIndexToInitialPartIndex(part_index); @@ -505,7 +508,8 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) std::vector borders; std::vector result_layers; - const size_t rows_per_layer = std::max(index_access.getTotalRowCount() / max_layers, 1); + size_t total_intersecting_rows_count = intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts().getRowsCountAllParts(); + const size_t rows_per_layer = std::max(total_intersecting_rows_count / max_layers, 1); while (!parts_ranges_queue.empty()) { @@ -703,18 +707,18 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1"); SplitResult split_result = split(std::move(parts), max_layers); - - SplitPartsWithRangesByPrimaryKeyResult result; - result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); - auto borders = std::move(split_result.borders); auto result_layers = std::move(split_result.layers); auto filters = buildFilters(primary_key, borders); + SplitPartsWithRangesByPrimaryKeyResult result; + result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); + result.merging_pipes.resize(result_layers.size()); + for (size_t i = 0; i < result_layers.size(); ++i) { - Pipe layer_pipe = in_order_reading_step_getter(std::move(result_layers[i])); - layer_pipe.addSimpleTransform([sorting_expr](const Block & header) + result.merging_pipes[i] = in_order_reading_step_getter(std::move(result_layers[i])); + result.merging_pipes[i].addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); }); auto & filter_function = filters[i]; @@ -723,19 +727,17 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( auto syntax_result = TreeRewriter(context).analyze(filter_function, primary_key.expression->getRequiredColumnsWithTypes()); auto actions = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false); - reorderColumns(*actions, layer_pipe.getHeader(), filter_function->getColumnName()); + reorderColumns(*actions, result.merging_pipes[i].getHeader(), filter_function->getColumnName()); ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in ({}, {}]", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); - layer_pipe.addSimpleTransform( + result.merging_pipes[i].addSimpleTransform( [&](const Block & header) { auto step = std::make_shared(header, expression_actions, filter_function->getColumnName(), true); step->setDescription(description); return step; }); - - result.merging_pipes.push_back(std::move(layer_pipe)); } return result; diff --git a/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql b/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql index 70067bcff74..780ed5b7984 100644 --- a/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql +++ b/tests/queries/0_stateless/02946_merge_tree_final_split_ranges_by_primary_key.sql @@ -9,7 +9,7 @@ INSERT INTO test_table SELECT 0, '0'; INSERT INTO test_table SELECT number + 1, number + 1 FROM numbers(15); OPTIMIZE TABLE test_table; -SELECT COUNT() FROM system.parts WHERE table = 'test_table' AND active = 1; +SELECT COUNT() FROM system.parts WHERE database = currentDatabase() AND table = 'test_table' AND active = 1; SYSTEM STOP MERGES test_table; SELECT '--'; From 46446ed0c473a4f11cde67f55bf449f845fd6b75 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 22 Dec 2023 17:31:36 +0300 Subject: [PATCH 16/88] Fixed tests --- src/Processors/QueryPlan/PartsSplitter.cpp | 93 ++++++++++--------- .../02286_parallel_final.reference | 14 ++- .../0_stateless/02286_parallel_final.sh | 16 +++- ...75_final_invalid_read_ranges_bug.reference | 27 ++++++ .../02875_final_invalid_read_ranges_bug.sql | 13 ++- 5 files changed, 110 insertions(+), 53 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 8804ff30bb2..590772f6f60 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -152,10 +152,6 @@ public: return ranges_in_data_parts; } - size_t mapPartIndexToInitialPartIndex(size_t part_index) const - { - return part_index_to_initial_ranges_in_data_parts_index.at(part_index); - } private: std::unordered_map part_index_to_current_ranges_in_data_parts_index; std::unordered_map part_index_to_initial_ranges_in_data_parts_index; @@ -179,20 +175,22 @@ struct PartsRangesIterator else if (compare_result == 1) return false; - // RangeStart event always before RangeEnd event - if (event != other.event) - return event < other.event; + if (part_index == other.part_index) + { + /// Within the same part we should process events in order of mark numbers, + /// because they already ordered by value and range ends have greater mark numbers than the beginnings. + /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. + const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; + const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; - /// Within the same part we should process events in order of mark numbers, - /// because they already ordered by value and range ends have greater mark numbers than the beginnings. - /// Otherwise we could get invalid ranges with the right bound that is less than the left bound. - const auto ev_mark = event == EventType::RangeStart ? range.begin : range.end; - const auto other_ev_mark = other.event == EventType::RangeStart ? other.range.begin : other.range.end; + // Start event always before end event + if (ev_mark == other_ev_mark) + return event < other.event; - if (ev_mark == other_ev_mark) - return part_index < other.part_index; + return ev_mark < other_ev_mark; + } - return ev_mark < other_ev_mark; + return part_index < other.part_index; } [[maybe_unused]] bool operator==(const PartsRangesIterator & other) const @@ -221,14 +219,13 @@ struct PartsRangesIterator EventType event; }; -struct SplitResult +struct SplitPartsRangesResult { RangesInDataParts non_intersecting_parts_ranges; - std::vector borders; - std::vector layers; + RangesInDataParts intersecting_parts_ranges; }; -SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) +SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts) { /** Split ranges in data parts into intersecting ranges in data parts and non intersecting ranges in data parts. * @@ -470,32 +467,46 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) add_non_intersecting_range(part_index, mark_range); } - auto & non_intersecting_ranges_in_data_parts = non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts(); - auto & intersecting_ranges_in_data_parts = intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts(); + auto && non_intersecting_ranges_in_data_parts = std::move(non_intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); + auto && intersecting_ranges_in_data_parts = std::move(intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts()); + std::stable_sort( + non_intersecting_ranges_in_data_parts.begin(), + non_intersecting_ranges_in_data_parts.end(), + [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; }); + + std::stable_sort( + intersecting_ranges_in_data_parts.begin(), + intersecting_ranges_in_data_parts.end(), + [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; }); + + return {std::move(non_intersecting_ranges_in_data_parts), std::move(intersecting_ranges_in_data_parts)}; +} + +std::pair, std::vector> splitIntersectingPartsRangesIntoLayers(RangesInDataParts intersecting_ranges_in_data_parts, size_t max_layers) +{ // We will advance the iterator pointing to the mark with the smallest PK value until // there will be not less than rows_per_layer rows in the current layer (roughly speaking). // Then we choose the last observed value as the new border, so the current layer will consists // of granules with values greater than the previous mark and less or equal than the new border. + IndexAccess index_access(intersecting_ranges_in_data_parts); std::priority_queue, std::greater<>> parts_ranges_queue; for (size_t part_index = 0; part_index < intersecting_ranges_in_data_parts.size(); ++part_index) { - size_t initial_part_index = intersecting_ranges_in_data_parts_builder.mapPartIndexToInitialPartIndex(part_index); - for (const auto & range : intersecting_ranges_in_data_parts[part_index].ranges) { const auto & index_granularity = intersecting_ranges_in_data_parts[part_index].data_part->index_granularity; parts_ranges_queue.push( - {index_access.getValue(initial_part_index, range.begin), range, initial_part_index, PartsRangesIterator::EventType::RangeStart}); + {index_access.getValue(part_index, range.begin), range, part_index, PartsRangesIterator::EventType::RangeStart}); const bool value_is_defined_at_end_mark = range.end < index_granularity.getMarksCount(); if (!value_is_defined_at_end_mark) continue; parts_ranges_queue.push( - {index_access.getValue(initial_part_index, range.end), range, initial_part_index, PartsRangesIterator::EventType::RangeEnd}); + {index_access.getValue(part_index, range.end), range, part_index, PartsRangesIterator::EventType::RangeEnd}); } } @@ -508,7 +519,7 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) std::vector borders; std::vector result_layers; - size_t total_intersecting_rows_count = intersecting_ranges_in_data_parts_builder.getCurrentRangesInDataParts().getRowsCountAllParts(); + size_t total_intersecting_rows_count = intersecting_ranges_in_data_parts.getRowsCountAllParts(); const size_t rows_per_layer = std::max(total_intersecting_rows_count / max_layers, 1); while (!parts_ranges_queue.empty()) @@ -525,7 +536,8 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) return marks_in_current_layer < intersected_parts * 2; }; - RangesInDataPartsBuilder current_layer_builder(ranges_in_data_parts); + RangesInDataPartsBuilder current_layer_builder(intersecting_ranges_in_data_parts); + result_layers.emplace_back(); while (rows_in_current_layer < rows_per_layer || layers_intersection_is_too_big() || result_layers.size() == max_layers) { @@ -573,14 +585,9 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) current_part_range_begin[part_index] = current_part_range_end[part_index]; } - result_layers.push_back(std::move(current_layer_builder.getCurrentRangesInDataParts())); + result_layers.back() = std::move(current_layer_builder.getCurrentRangesInDataParts()); } - std::stable_sort( - non_intersecting_ranges_in_data_parts.begin(), - non_intersecting_ranges_in_data_parts.end(), - [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; }); - for (auto & layer : result_layers) { std::stable_sort( @@ -589,7 +596,7 @@ SplitResult split(RangesInDataParts ranges_in_data_parts, size_t max_layers) [](const auto & lhs, const auto & rhs) { return lhs.part_index_in_query < rhs.part_index_in_query; }); } - return {std::move(non_intersecting_ranges_in_data_parts), std::move(borders), std::move(result_layers)}; + return {std::move(result_layers), std::move(borders)}; } @@ -706,18 +713,18 @@ SplitPartsWithRangesByPrimaryKeyResult splitPartsWithRangesByPrimaryKey( if (max_layers <= 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "max_layer should be greater than 1"); - SplitResult split_result = split(std::move(parts), max_layers); - auto borders = std::move(split_result.borders); - auto result_layers = std::move(split_result.layers); - auto filters = buildFilters(primary_key, borders); - SplitPartsWithRangesByPrimaryKeyResult result; - result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); - result.merging_pipes.resize(result_layers.size()); - for (size_t i = 0; i < result_layers.size(); ++i) + SplitPartsRangesResult split_result = splitPartsRanges(std::move(parts)); + result.non_intersecting_parts_ranges = std::move(split_result.non_intersecting_parts_ranges); + + auto && [layers, borders] = splitIntersectingPartsRangesIntoLayers(std::move(split_result.intersecting_parts_ranges), max_layers); + auto filters = buildFilters(primary_key, borders); + result.merging_pipes.resize(layers.size()); + + for (size_t i = 0; i < layers.size(); ++i) { - result.merging_pipes[i] = in_order_reading_step_getter(std::move(result_layers[i])); + result.merging_pipes[i] = in_order_reading_step_getter(std::move(layers[i])); result.merging_pipes[i].addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); }); diff --git a/tests/queries/0_stateless/02286_parallel_final.reference b/tests/queries/0_stateless/02286_parallel_final.reference index f6573cb9042..5801fb46908 100644 --- a/tests/queries/0_stateless/02286_parallel_final.reference +++ b/tests/queries/0_stateless/02286_parallel_final.reference @@ -1,9 +1,13 @@ +Test intersecting ranges 2 2 3 5 -8 -8 -8 -8 -8 +Test intersecting ranges finished +Test non intersecting ranges +0 +0 +0 +0 +0 +Test non intersecting ranges finished diff --git a/tests/queries/0_stateless/02286_parallel_final.sh b/tests/queries/0_stateless/02286_parallel_final.sh index de0cca0e966..90c9758142c 100755 --- a/tests/queries/0_stateless/02286_parallel_final.sh +++ b/tests/queries/0_stateless/02286_parallel_final.sh @@ -5,13 +5,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +echo "Test intersecting ranges" + test_random_values() { layers=$1 $CLICKHOUSE_CLIENT -n -q " + drop table if exists tbl_8parts_${layers}granules_rnd; create table tbl_8parts_${layers}granules_rnd (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 % 8); insert into tbl_8parts_${layers}granules_rnd select number, 1 from numbers_mt($((layers * 8 * 8192))); optimize table tbl_8parts_${layers}granules_rnd final; - explain pipeline select * from tbl_8parts_${layers}granules_rnd final settings max_threads = 16;" 2>&1 | + explain pipeline select * from tbl_8parts_${layers}granules_rnd final settings max_threads = 16; + drop table tbl_8parts_${layers}granules_rnd;" 2>&1 | grep -c "CollapsingSortedTransform" } @@ -19,16 +23,24 @@ for layers in 2 3 5 8; do test_random_values $layers done; +echo "Test intersecting ranges finished" + +echo "Test non intersecting ranges" + test_sequential_values() { layers=$1 $CLICKHOUSE_CLIENT -n -q " + drop table if exists tbl_8parts_${layers}granules_seq; create table tbl_8parts_${layers}granules_seq (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 / $((layers * 8192)))::UInt64; insert into tbl_8parts_${layers}granules_seq select number, 1 from numbers_mt($((layers * 8 * 8192))); optimize table tbl_8parts_${layers}granules_seq final; - explain pipeline select * from tbl_8parts_${layers}granules_seq final settings max_threads = 8;" 2>&1 | + explain pipeline select * from tbl_8parts_${layers}granules_seq final settings max_threads = 8; + drop table tbl_8parts_${layers}granules_seq;" 2>&1 | grep -c "CollapsingSortedTransform" } for layers in 2 3 5 8 16; do test_sequential_values $layers done; + +echo "Test non intersecting ranges finished" diff --git a/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference index 573541ac970..10fcc44daed 100644 --- a/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference +++ b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.reference @@ -1 +1,28 @@ +5879429 2023-07-01 03:50:35 2023-07-01 03:50:35 -278 +5881397 2023-07-01 06:22:26 2023-07-01 06:22:27 2807 +5925060 2023-07-04 00:24:03 2023-07-04 00:24:02 -12 +5936591 2023-07-04 07:37:19 2023-07-04 07:37:18 -12 +5940709 2023-07-04 09:13:35 2023-07-04 09:13:35 2820 +5942342 2023-07-04 09:58:00 2023-07-04 09:57:59 -12 +5952231 2023-07-04 22:33:24 2023-07-04 22:33:24 1692 +5959449 2023-07-05 04:32:55 2023-07-05 04:32:54 -12 +5963240 2023-07-05 06:37:08 2023-07-05 06:37:09 1709 +5965742 2023-07-05 07:27:01 2023-07-05 07:27:02 1709 +5969948 2023-07-05 08:44:36 2023-07-05 08:44:37 2278 +5971673 2023-07-05 09:14:09 2023-07-05 09:14:09 5695 +6012987 2023-07-06 20:52:28 2023-07-06 20:52:27 -536 +0 +5879429 2023-07-01 03:50:35 2023-07-01 03:50:35 -278 +5881397 2023-07-01 06:22:26 2023-07-01 06:22:27 2807 +5925060 2023-07-04 00:24:03 2023-07-04 00:24:02 -12 +5936591 2023-07-04 07:37:19 2023-07-04 07:37:18 -12 +5940709 2023-07-04 09:13:35 2023-07-04 09:13:35 2820 +5942342 2023-07-04 09:58:00 2023-07-04 09:57:59 -12 +5952231 2023-07-04 22:33:24 2023-07-04 22:33:24 1692 +5959449 2023-07-05 04:32:55 2023-07-05 04:32:54 -12 +5963240 2023-07-05 06:37:08 2023-07-05 06:37:09 1709 +5965742 2023-07-05 07:27:01 2023-07-05 07:27:02 1709 +5969948 2023-07-05 08:44:36 2023-07-05 08:44:37 2278 +5971673 2023-07-05 09:14:09 2023-07-05 09:14:09 5695 +6012987 2023-07-06 20:52:28 2023-07-06 20:52:27 -536 0 diff --git a/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql index 4e91c2e3167..5557c572696 100644 --- a/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql +++ b/tests/queries/0_stateless/02875_final_invalid_read_ranges_bug.sql @@ -1,3 +1,4 @@ +DROP TABLE IF EXISTS t; CREATE TABLE t ( tid UInt64, @@ -13,8 +14,14 @@ SETTINGS index_granularity = 1; INSERT INTO t VALUES (5879429,'2023-07-01 03:50:35','2023-07-01 03:50:35',-278) (5881397,'2023-07-01 06:22:26','2023-07-01 06:22:27',2807) (5925060,'2023-07-04 00:24:03','2023-07-04 00:24:02',-12) (5936591,'2023-07-04 07:37:19','2023-07-04 07:37:18',-12) (5940709,'2023-07-04 09:13:35','2023-07-04 09:13:35',2820) (5942342,'2023-07-04 09:58:00','2023-07-04 09:57:59',-12) (5952231,'2023-07-04 22:33:24','2023-07-04 22:33:24',1692) (5959449,'2023-07-05 04:32:55','2023-07-05 04:32:54',-12) (5963240,'2023-07-05 06:37:08','2023-07-05 06:37:09',1709) (5965742,'2023-07-05 07:27:01','2023-07-05 07:27:02',1709) (5969948,'2023-07-05 08:44:36','2023-07-05 08:44:37',2278) (5971673,'2023-07-05 09:14:09','2023-07-05 09:14:09',5695) (6012987,'2023-07-06 20:52:28','2023-07-06 20:52:27',-536); -SELECT sum(amount) -FROM t FINAL -WHERE (processed_at >= '2023-09-19 00:00:00') AND (processed_at <= '2023-09-20 01:00:00'); +SELECT tid, processed_at, created_at, amount FROM t FINAL ORDER BY tid; + +SELECT sum(amount) FROM t FINAL WHERE (processed_at >= '2023-09-19 00:00:00') AND (processed_at <= '2023-09-20 01:00:00'); + +INSERT INTO t VALUES (5879429,'2023-07-01 03:50:35','2023-07-01 03:50:35',-278) (5881397,'2023-07-01 06:22:26','2023-07-01 06:22:27',2807) (5925060,'2023-07-04 00:24:03','2023-07-04 00:24:02',-12) (5936591,'2023-07-04 07:37:19','2023-07-04 07:37:18',-12) (5940709,'2023-07-04 09:13:35','2023-07-04 09:13:35',2820) (5942342,'2023-07-04 09:58:00','2023-07-04 09:57:59',-12) (5952231,'2023-07-04 22:33:24','2023-07-04 22:33:24',1692) (5959449,'2023-07-05 04:32:55','2023-07-05 04:32:54',-12) (5963240,'2023-07-05 06:37:08','2023-07-05 06:37:09',1709) (5965742,'2023-07-05 07:27:01','2023-07-05 07:27:02',1709) (5969948,'2023-07-05 08:44:36','2023-07-05 08:44:37',2278) (5971673,'2023-07-05 09:14:09','2023-07-05 09:14:09',5695) (6012987,'2023-07-06 20:52:28','2023-07-06 20:52:27',-536); + +SELECT tid, processed_at, created_at, amount FROM t FINAL ORDER BY tid; + +SELECT sum(amount) FROM t FINAL WHERE (processed_at >= '2023-09-19 00:00:00') AND (processed_at <= '2023-09-20 01:00:00'); DROP TABLE t; From c5959068cb694125dbf34117f60c4e2a1aa92eab Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 23 Dec 2023 14:50:27 +0300 Subject: [PATCH 17/88] Fixed tests --- src/Processors/QueryPlan/PartsSplitter.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 590772f6f60..a4b5cf60102 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -46,7 +46,6 @@ int compareValues(const Values & lhs, const Values & rhs) return 0; } - /// Adaptor to access PK values from index. class IndexAccess { @@ -190,7 +189,11 @@ struct PartsRangesIterator return ev_mark < other_ev_mark; } - return part_index < other.part_index; + if (event == other.event) + return part_index < other.part_index; + + // Start event always before end event + return event < other.event; } [[maybe_unused]] bool operator==(const PartsRangesIterator & other) const @@ -323,7 +326,7 @@ SplitPartsRangesResult splitPartsRanges(RangesInDataParts ranges_in_data_parts) std::unordered_map part_index_start_to_range; - chassert(parts_ranges.size() > 1); + chassert(!parts_ranges.empty()); chassert(parts_ranges[0].event == PartsRangesIterator::EventType::RangeStart); part_index_start_to_range[parts_ranges[0].part_index] = parts_ranges[0].range; From 2765acfe45e5bb0efda452ffcafc382cab207da7 Mon Sep 17 00:00:00 2001 From: Pengyuan Bian Date: Tue, 26 Dec 2023 07:50:46 +0000 Subject: [PATCH 18/88] wip test. --- .../__init__.py | 0 .../configs/config.xml | 13 +++ .../configs/settings.xml | 12 +++ .../test.py | 93 +++++++++++++++++++ 4 files changed, 118 insertions(+) create mode 100644 tests/integration/test_attach_table_from_s3_plain_readonly/__init__.py create mode 100644 tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml create mode 100644 tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml create mode 100644 tests/integration/test_attach_table_from_s3_plain_readonly/test.py diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/__init__.py b/tests/integration/test_attach_table_from_s3_plain_readonly/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml new file mode 100644 index 00000000000..1a62adf2d6e --- /dev/null +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml @@ -0,0 +1,13 @@ + + + + + s3_plain + http://minio1:9001/root/data/disks/disk_s3_plain/ + minio + minio123 + true + + + + diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml new file mode 100644 index 00000000000..3e6d615557d --- /dev/null +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/settings.xml @@ -0,0 +1,12 @@ + + + + 1 + + + + + default + + + diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/test.py b/tests/integration/test_attach_table_from_s3_plain_readonly/test.py new file mode 100644 index 00000000000..724c87effe1 --- /dev/null +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/test.py @@ -0,0 +1,93 @@ +import re +import os +import logging +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry +from minio.error import S3Error +from pathlib import Path + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/config.xml"], + user_configs=["configs/settings.xml"], + with_zookeeper=True, + with_minio=True, + stay_alive=True, + macros={"shard": 1, "replica": 1}, +) + +uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") + +def upload_to_minio(minio_client, bucket_name, local_path, minio_path=''): + local_path = Path(local_path) + for root, _, files in os.walk(local_path): + for file in files: + local_file_path = Path(root) / file + minio_object_name = minio_path + str(local_file_path.relative_to(local_path)) + + try: + with open(local_file_path, 'rb') as data: + file_stat = os.stat(local_file_path) + minio_client.put_object(bucket_name, minio_object_name, data, file_stat.st_size) + logging.info(f'Uploaded {local_file_path} to {minio_object_name}') + except S3Error as e: + logging.info(f'Error uploading {local_file_path}: {e}') + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_attach_table_from_s3_plain_readonly(started_cluster): + node.query( + """ + create database local_db; + + create table local_db.test_table (num UInt32) engine=MergeTree() order by num; + + insert into local_db.test_table (*) Values (5) + """ + ) + + assert int(node.query("select num from local_db.test_table limit 1")) == 5 + + # Copy local file into minio bucket + table_data_path = os.path.join(node.path, f"database/store") + minio = cluster.minio_client + upload_to_minio(minio, cluster.minio_bucket, table_data_path, "data/disks/disk_s3_plain/") + + ### remove + s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + for s3_object in s3_objects: + logging.info("bianpengyuan Existing S3 object: %s", s3_object.object_name) + ### remove + + # Create a replicated database, and attach the merge tree data disk + table_uuid = node.query("SELECT uuid FROM system.tables WHERE database='local_db' AND table='test_table'").strip() + node.query( + f""" + drop table local_db.test_table SYNC; + + create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica1'); + attach table s3_plain_test_db.test_table UUID '{table_uuid}' (num UInt32) + engine=MergeTree() + order by num + settings + disk=disk(type=s3_plain, + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/', + access_key_id='minio', + secret_access_key='minio123'); + """ + ) + + assert int(node.query("select num from s3_plain_test_db.test_table limit 1")) == 5 From 76abd321963761059cff34fc2a176ed888ee32eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 26 Dec 2023 20:09:34 +0100 Subject: [PATCH 19/88] Speedup MIN and MAX for native types --- .../AggregateFunctionMax.cpp | 114 ++++++++++- .../AggregateFunctionMin.cpp | 114 ++++++++++- .../AggregateFunctionMinMaxAny.h | 23 ++- src/AggregateFunctions/AggregateFunctionSum.h | 2 +- src/AggregateFunctions/findNumeric.cpp | 15 ++ src/AggregateFunctions/findNumeric.h | 154 ++++++++++++++ src/Common/TargetSpecific.h | 27 +++ .../02406_minmax_behaviour.reference | 192 ++++++++++++++++++ .../0_stateless/02406_minmax_behaviour.sql | 140 +++++++++++++ 9 files changed, 771 insertions(+), 10 deletions(-) create mode 100644 src/AggregateFunctions/findNumeric.cpp create mode 100644 src/AggregateFunctions/findNumeric.h create mode 100644 tests/queries/0_stateless/02406_minmax_behaviour.reference create mode 100644 tests/queries/0_stateless/02406_minmax_behaviour.sql diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp index 813129e42ec..069e80898d7 100644 --- a/src/AggregateFunctions/AggregateFunctionMax.cpp +++ b/src/AggregateFunctions/AggregateFunctionMax.cpp @@ -1,7 +1,7 @@ #include -#include #include - +#include +#include namespace DB { @@ -10,10 +10,118 @@ struct Settings; namespace { +template +class AggregateFunctionsSingleValueMax final : public AggregateFunctionsSingleValue +{ + using Parent = AggregateFunctionsSingleValue; + +public: + explicit AggregateFunctionsSingleValueMax(const DataTypePtr & type) : Parent(type) { } + + /// Specializations for native numeric types + ALWAYS_INLINE inline void addBatchSinglePlace( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + Arena * arena, + ssize_t if_argument_pos) const override; + + ALWAYS_INLINE inline void addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + const UInt8 * __restrict null_map, + Arena * arena, + ssize_t if_argument_pos) const override; +}; + +#define SPECIALIZE(TYPE) \ +template <> \ +void AggregateFunctionsSingleValueMax>>::addBatchSinglePlace( \ + size_t row_begin, \ + size_t row_end, \ + AggregateDataPtr __restrict place, \ + const IColumn ** __restrict columns, \ + Arena *, \ + ssize_t if_argument_pos) const \ +{ \ + const auto & column = assert_cast>::ColVecType &>(*columns[0]); \ + std::optional opt; \ + if (if_argument_pos >= 0) \ + { \ + const auto & flags = assert_cast(*columns[if_argument_pos]).getData(); \ + opt = findNumericMaxIf(column.getData().data(), flags.data(), row_begin, row_end); \ + } \ + else \ + opt = findNumericMax(column.getData().data(), row_begin, row_end); \ + if (opt.has_value()) \ + this->data(place).changeIfGreater(opt.value()); \ +} + + FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) +#undef SPECIALIZE + +template +void AggregateFunctionsSingleValueMax::addBatchSinglePlace( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + Arena * arena, + ssize_t if_argument_pos) const +{ + return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); +} + +#define SPECIALIZE(TYPE) \ +template <> \ +void AggregateFunctionsSingleValueMax>>::addBatchSinglePlaceNotNull( \ + size_t row_begin, \ + size_t row_end, \ + AggregateDataPtr __restrict place, \ + const IColumn ** __restrict columns, \ + const UInt8 * __restrict null_map, \ + Arena *, \ + ssize_t if_argument_pos) const \ +{ \ + const auto & column = assert_cast>::ColVecType &>(*columns[0]); \ + std::optional opt; \ + if (if_argument_pos >= 0) \ + { \ + const auto * if_flags = assert_cast(*columns[if_argument_pos]).getData().data(); \ + auto final_flags = std::make_unique(row_end); \ + for (size_t i = row_begin; i < row_end; ++i) \ + final_flags[i] = (!null_map[i]) & !!if_flags[i]; \ + opt = findNumericMaxIf(column.getData().data(), final_flags.get(), row_begin, row_end); \ + } \ + else \ + opt = findNumericMaxNotNull(column.getData().data(), null_map, row_begin, row_end); \ + if (opt.has_value()) \ + this->data(place).changeIfGreater(opt.value()); \ +} + + FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) +#undef SPECIALIZE + +template +void AggregateFunctionsSingleValueMax::addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + const UInt8 * __restrict null_map, + Arena * arena, + ssize_t if_argument_pos) const +{ + return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos); +} + AggregateFunctionPtr createAggregateFunctionMax( const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { - return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); + return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); } AggregateFunctionPtr createAggregateFunctionArgMax( diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp index ac3e05121f7..2312d639363 100644 --- a/src/AggregateFunctions/AggregateFunctionMin.cpp +++ b/src/AggregateFunctions/AggregateFunctionMin.cpp @@ -1,6 +1,7 @@ #include -#include #include +#include +#include namespace DB @@ -10,10 +11,119 @@ struct Settings; namespace { +template +class AggregateFunctionsSingleValueMin final : public AggregateFunctionsSingleValue +{ + using Parent = AggregateFunctionsSingleValue; + +public: + explicit AggregateFunctionsSingleValueMin(const DataTypePtr & type) : Parent(type) { } + + /// Specializations for native numeric types + ALWAYS_INLINE inline void addBatchSinglePlace( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + Arena * arena, + ssize_t if_argument_pos) const override; + + ALWAYS_INLINE inline void addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + const UInt8 * __restrict null_map, + Arena * arena, + ssize_t if_argument_pos) const override; +}; + +#define SPECIALIZE(TYPE) \ + template <> \ + void AggregateFunctionsSingleValueMin>>::addBatchSinglePlace( \ + size_t row_begin, \ + size_t row_end, \ + AggregateDataPtr __restrict place, \ + const IColumn ** __restrict columns, \ + Arena *, \ + ssize_t if_argument_pos) const \ + { \ + const auto & column = assert_cast>::ColVecType &>(*columns[0]); \ + std::optional opt; \ + if (if_argument_pos >= 0) \ + { \ + const auto & flags = assert_cast(*columns[if_argument_pos]).getData(); \ + opt = findNumericMinIf(column.getData().data(), flags.data(), row_begin, row_end); \ + } \ + else \ + opt = findNumericMin(column.getData().data(), row_begin, row_end); \ + if (opt.has_value()) \ + this->data(place).changeIfLess(opt.value()); \ + } + +FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) +#undef SPECIALIZE + +template +void AggregateFunctionsSingleValueMin::addBatchSinglePlace( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + Arena * arena, + ssize_t if_argument_pos) const +{ + return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); +} + +#define SPECIALIZE(TYPE) \ + template <> \ + void AggregateFunctionsSingleValueMin>>::addBatchSinglePlaceNotNull( \ + size_t row_begin, \ + size_t row_end, \ + AggregateDataPtr __restrict place, \ + const IColumn ** __restrict columns, \ + const UInt8 * __restrict null_map, \ + Arena *, \ + ssize_t if_argument_pos) const \ + { \ + const auto & column = assert_cast>::ColVecType &>(*columns[0]); \ + std::optional opt; \ + if (if_argument_pos >= 0) \ + { \ + const auto * if_flags = assert_cast(*columns[if_argument_pos]).getData().data(); \ + auto final_flags = std::make_unique(row_end); \ + for (size_t i = row_begin; i < row_end; ++i) \ + final_flags[i] = (!null_map[i]) & !!if_flags[i]; \ + opt = findNumericMinIf(column.getData().data(), final_flags.get(), row_begin, row_end); \ + } \ + else \ + opt = findNumericMinNotNull(column.getData().data(), null_map, row_begin, row_end); \ + if (opt.has_value()) \ + this->data(place).changeIfLess(opt.value()); \ + } + +FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) +#undef SPECIALIZE + +template +void AggregateFunctionsSingleValueMin::addBatchSinglePlaceNotNull( + size_t row_begin, + size_t row_end, + AggregateDataPtr __restrict place, + const IColumn ** __restrict columns, + const UInt8 * __restrict null_map, + Arena * arena, + ssize_t if_argument_pos) const +{ + return Parent::addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, if_argument_pos); +} + AggregateFunctionPtr createAggregateFunctionMin( const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) { - return AggregateFunctionPtr(createAggregateFunctionSingleValue(name, argument_types, parameters, settings)); + return AggregateFunctionPtr(createAggregateFunctionSingleValue( + name, argument_types, parameters, settings)); } AggregateFunctionPtr createAggregateFunctionArgMin( diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index ef1de76df79..0457b3d7d6f 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -43,14 +43,12 @@ namespace ErrorCodes template struct SingleValueDataFixed { -private: using Self = SingleValueDataFixed; using ColVecType = ColumnVectorOrDecimal; bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf. T value = T{}; -public: static constexpr bool result_is_nullable = false; static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; @@ -157,6 +155,15 @@ public: return false; } + void changeIfLess(T from) + { + if (!has() || from < value) + { + has_value = true; + value = from; + } + } + bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena) { if (!has() || assert_cast(column).getData()[row_num] > value) @@ -179,6 +186,15 @@ public: return false; } + void changeIfGreater(T & from) + { + if (!has() || from > value) + { + has_value = true; + value = from; + } + } + bool isEqualTo(const Self & to) const { return has() && to.value == value; @@ -448,7 +464,6 @@ public: } #endif - }; struct Compatibility @@ -1214,7 +1229,7 @@ struct AggregateFunctionAnyHeavyData : Data template -class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper> +class AggregateFunctionsSingleValue : public IAggregateFunctionDataHelper> { static constexpr bool is_any = Data::is_any; diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index b3006f2ce82..5781ab69c6b 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -504,7 +504,7 @@ public: const auto * if_flags = assert_cast(*columns[if_argument_pos]).getData().data(); auto final_flags = std::make_unique(row_end); for (size_t i = row_begin; i < row_end; ++i) - final_flags[i] = (!null_map[i]) & if_flags[i]; + final_flags[i] = (!null_map[i]) & !!if_flags[i]; this->data(place).addManyConditional(column.getData().data(), final_flags.get(), row_begin, row_end); } diff --git a/src/AggregateFunctions/findNumeric.cpp b/src/AggregateFunctions/findNumeric.cpp new file mode 100644 index 00000000000..bbad8c1fe3d --- /dev/null +++ b/src/AggregateFunctions/findNumeric.cpp @@ -0,0 +1,15 @@ +#include + +namespace DB +{ +#define INSTANTIATION(T) \ + template std::optional findNumericMin(const T * __restrict ptr, size_t start, size_t end); \ + template std::optional findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \ + template std::optional findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \ + template std::optional findNumericMax(const T * __restrict ptr, size_t start, size_t end); \ + template std::optional findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \ + template std::optional findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); + +FOR_BASIC_NUMERIC_TYPES(INSTANTIATION) +#undef INSTANTIATION +} diff --git a/src/AggregateFunctions/findNumeric.h b/src/AggregateFunctions/findNumeric.h new file mode 100644 index 00000000000..df7c325569a --- /dev/null +++ b/src/AggregateFunctions/findNumeric.h @@ -0,0 +1,154 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ +template +concept is_any_native_number = (is_any_of); + +template +struct MinComparator +{ + static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::min(a, b); } +}; + +template +struct MaxComparator +{ + static ALWAYS_INLINE inline const T & cmp(const T & a, const T & b) { return std::max(a, b); } +}; + +MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(template static std::optional NO_INLINE), + findNumericExtremeImpl, + MULTITARGET_FUNCTION_BODY((const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t row_begin, size_t row_end) + { + size_t count = row_end - row_begin; + ptr += row_begin; + if constexpr (!add_all_elements) + condition_map += row_begin; + + T ret{}; + size_t i = 0; + for (; i < count; i++) + { + if (add_all_elements || !condition_map[i] == add_if_cond_zero) + { + ret = ptr[i]; + break; + } + } + if (i >= count) + return std::nullopt; + + /// Unroll the loop manually for floating point, since the compiler doesn't do it without fastmath + /// as it might change the return value + if constexpr (std::is_floating_point_v) + { + constexpr size_t unroll_block = 512 / sizeof(T); /// Chosen via benchmarks with AVX2 so YMMV + size_t unrolled_end = i + (((count - i) / unroll_block) * unroll_block); + + if (i < unrolled_end) + { + T partial_min[unroll_block]; + for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++) + partial_min[unroll_it] = ret; + + while (i < unrolled_end) + { + for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++) + { + if (add_all_elements || !condition_map[i + unroll_it] == add_if_cond_zero) + partial_min[unroll_it] = ComparatorClass::cmp(partial_min[unroll_it], ptr[i + unroll_it]); + } + i += unroll_block; + } + for (size_t unroll_it = 0; unroll_it < unroll_block; unroll_it++) + ret = ComparatorClass::cmp(ret, partial_min[unroll_it]); + } + } + + for (; i < count; i++) + { + if (add_all_elements || !condition_map[i] == add_if_cond_zero) + ret = ComparatorClass::cmp(ret, ptr[i]); + } + + return ret; + } +)) + + +/// Given a vector of T finds the extreme (MIN or MAX) value +template +static std::optional +findNumericExtreme(const T * __restrict ptr, const UInt8 * __restrict condition_map [[maybe_unused]], size_t start, size_t end) +{ +#if USE_MULTITARGET_CODE + /// We see no benefit from using AVX512BW or AVX512F (over AVX2), so we only declare SSE and AVX2 + if (isArchSupported(TargetArch::AVX2)) + return findNumericExtremeImplAVX2(ptr, condition_map, start, end); + + if (isArchSupported(TargetArch::SSE42)) + return findNumericExtremeImplSSE42(ptr, condition_map, start, end); +#endif + return findNumericExtremeImpl(ptr, condition_map, start, end); +} + +template +std::optional findNumericMin(const T * __restrict ptr, size_t start, size_t end) +{ + return findNumericExtreme, true, false>(ptr, nullptr, start, end); +} + +template +std::optional findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) +{ + return findNumericExtreme, false, true>(ptr, condition_map, start, end); +} + +template +std::optional findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) +{ + return findNumericExtreme, false, false>(ptr, condition_map, start, end); +} + +template +std::optional findNumericMax(const T * __restrict ptr, size_t start, size_t end) +{ + return findNumericExtreme, true, false>(ptr, nullptr, start, end); +} + +template +std::optional findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) +{ + return findNumericExtreme, false, true>(ptr, condition_map, start, end); +} + +template +std::optional findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) +{ + return findNumericExtreme, false, false>(ptr, condition_map, start, end); +} + + +#define EXTERN_INSTANTIATION(T) \ + extern template std::optional findNumericMin(const T * __restrict ptr, size_t start, size_t end); \ + extern template std::optional findNumericMinNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \ + extern template std::optional findNumericMinIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \ + extern template std::optional findNumericMax(const T * __restrict ptr, size_t start, size_t end); \ + extern template std::optional findNumericMaxNotNull(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); \ + extern template std::optional findNumericMaxIf(const T * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end); + + FOR_BASIC_NUMERIC_TYPES(EXTERN_INSTANTIATION) +#undef EXTERN_INSTANTIATION + +} diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index fd6a57090b8..4ee29d3fc55 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -348,6 +348,25 @@ DECLARE_AVX512VBMI2_SPECIFIC_CODE( #if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) +/// NOLINTNEXTLINE +#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \ + FUNCTION_HEADER \ + \ + AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \ + name##AVX2 \ + FUNCTION_BODY \ + \ + FUNCTION_HEADER \ + \ + SSE42_FUNCTION_SPECIFIC_ATTRIBUTE \ + name##SSE42 \ + FUNCTION_BODY \ + \ + FUNCTION_HEADER \ + \ + name \ + FUNCTION_BODY \ + /// NOLINTNEXTLINE #define MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \ FUNCTION_HEADER \ @@ -381,6 +400,14 @@ DECLARE_AVX512VBMI2_SPECIFIC_CODE( #else + /// NOLINTNEXTLINE +#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \ + FUNCTION_HEADER \ + \ + name \ + FUNCTION_BODY \ + + /// NOLINTNEXTLINE #define MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \ FUNCTION_HEADER \ diff --git a/tests/queries/0_stateless/02406_minmax_behaviour.reference b/tests/queries/0_stateless/02406_minmax_behaviour.reference new file mode 100644 index 00000000000..d52ba640a0e --- /dev/null +++ b/tests/queries/0_stateless/02406_minmax_behaviour.reference @@ -0,0 +1,192 @@ +-- { echoOn } +SET compile_aggregate_expressions=0; +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + min(data), + min(data2), + min(data3), + min(data4), + min(data5); +1 nan 1 nan nan +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + max(data), + max(data2), + max(data3), + max(data4), + max(data5); +5 nan 4 nan nan +Select max(number) from numbers(100) settings max_threads=1, max_block_size=10; +99 +Select max(-number) from numbers(100); +0 +Select min(number) from numbers(100) settings max_threads=1, max_block_size=10; +0 +Select min(-number) from numbers(100); +-99 +SELECT minIf(number, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; +0 +SELECT maxIf(number, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; +0 +SELECT minIf(number::Float64, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; +0 +SELECT maxIf(number::Float64, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; +0 +SELECT minIf(number::String, number < 10) as number from numbers(10, 1000); + +SELECT maxIf(number::String, number < 10) as number from numbers(10, 1000); + +SELECT maxIf(number::String, number % 3), maxIf(number::String, number % 5), minIf(number::String, number % 3), minIf(number::String, number > 10) from numbers(400); +98 99 1 100 +SELECT minIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); +\N +SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); +\N +SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +22 +SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +26 +SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMax(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMax(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMax(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMax(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMax(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10009 +SELECT argMax(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10009 +SELECT argMaxIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10009 +SELECT argMaxIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10009 +SELECT argMaxIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +0 +SELECT argMaxIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +0 +SELECT argMax(number, number::Float64) from numbers(2029); +2028 +SELECT argMaxIf(number, number::Float64, number > 2030) from numbers(2029); +0 +SELECT argMaxIf(number, number::Float64, number > 2030) from numbers(2032); +2031 +SELECT argMax(number, -number::Float64) from numbers(2029); +0 +SELECT argMaxIf(number, -number::Float64, number > 2030) from numbers(2029); +0 +SELECT argMaxIf(number, -number::Float64, number > 2030) from numbers(2032); +2031 +SELECT argMin(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMin(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMin(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMin(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMin(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMin(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMin(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMin(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMinIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +10 +SELECT argMinIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +10 +SELECT argMinIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +0 +SELECT argMinIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +0 +SELECT argMin(number, number::Float64) from numbers(2029); +0 +SELECT argMinIf(number, number::Float64, number > 2030) from numbers(2029); +0 +SELECT argMinIf(number, number::Float64, number > 2030) from numbers(2032); +2031 +SELECT argMin(number, -number::Float64) from numbers(2029); +2028 +SELECT argMinIf(number, -number::Float64, number > 2030) from numbers(2029); +0 +SELECT argMinIf(number, -number::Float64, number > 2030) from numbers(2032); +2031 +Select argMax((n, n), n) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +(8,8) Tuple(Nullable(UInt64), Nullable(UInt64)) +Select argMaxIf((n, n), n, n < 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +(4,4) Tuple(Nullable(UInt64), Nullable(UInt64)) +Select argMaxIf((n, n), n, n > 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +(8,8) Tuple(Nullable(UInt64), Nullable(UInt64)) +Select argMin((n, n), n) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +(1,1) Tuple(Nullable(UInt64), Nullable(UInt64)) +Select argMinIf((n, n), n, n < 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +(1,1) Tuple(Nullable(UInt64), Nullable(UInt64)) +Select argMinIf((n, n), n, n > 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +(7,7) Tuple(Nullable(UInt64), Nullable(UInt64)) +SET compile_aggregate_expressions=1; +SET min_count_to_compile_aggregate_expression=0; +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + min(data), + min(data2), + min(data3), + min(data4), + min(data5); +1 nan 1 nan nan +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + max(data), + max(data2), + max(data3), + max(data4), + max(data5); +5 nan 4 nan nan +SELECT minIf(number, rand() % 2 == 3) from numbers(10); +0 +SELECT maxIf(number, rand() % 2 == 3) from numbers(10); +0 +SELECT minIf(number::Float64, rand() % 2 == 3) from numbers(10); +0 +SELECT maxIf(number::Float64, rand() % 2 == 3) from numbers(10); +0 +SELECT minIf(number::String, number < 10) as number from numbers(10, 1000); + +SELECT maxIf(number::String, number < 10) as number from numbers(10, 1000); + +SELECT maxIf(number::String, number % 3), maxIf(number::String, number % 5), minIf(number::String, number % 3), minIf(number::String, number > 10) from numbers(400); +98 99 1 100 +SELECT minIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); +\N +SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); +\N +SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +22 +SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +26 diff --git a/tests/queries/0_stateless/02406_minmax_behaviour.sql b/tests/queries/0_stateless/02406_minmax_behaviour.sql new file mode 100644 index 00000000000..a3afe7d40b0 --- /dev/null +++ b/tests/queries/0_stateless/02406_minmax_behaviour.sql @@ -0,0 +1,140 @@ +-- { echoOn } +SET compile_aggregate_expressions=0; + +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + min(data), + min(data2), + min(data3), + min(data4), + min(data5); + +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + max(data), + max(data2), + max(data3), + max(data4), + max(data5); + +Select max(number) from numbers(100) settings max_threads=1, max_block_size=10; +Select max(-number) from numbers(100); +Select min(number) from numbers(100) settings max_threads=1, max_block_size=10; +Select min(-number) from numbers(100); + +SELECT minIf(number, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; +SELECT maxIf(number, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; + +SELECT minIf(number::Float64, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; +SELECT maxIf(number::Float64, rand() % 2 == 3) from numbers(10) settings max_threads=1, max_block_size=5; + +SELECT minIf(number::String, number < 10) as number from numbers(10, 1000); +SELECT maxIf(number::String, number < 10) as number from numbers(10, 1000); +SELECT maxIf(number::String, number % 3), maxIf(number::String, number % 5), minIf(number::String, number % 3), minIf(number::String, number > 10) from numbers(400); + +SELECT minIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); +SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); + +SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); + +SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMax(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMax(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMax(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMax(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMax(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMax(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMax(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMaxIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMaxIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMaxIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMaxIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMax(number, number::Float64) from numbers(2029); +SELECT argMaxIf(number, number::Float64, number > 2030) from numbers(2029); +SELECT argMaxIf(number, number::Float64, number > 2030) from numbers(2032); +SELECT argMax(number, -number::Float64) from numbers(2029); +SELECT argMaxIf(number, -number::Float64, number > 2030) from numbers(2029); +SELECT argMaxIf(number, -number::Float64, number > 2030) from numbers(2032); + +SELECT argMin(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMin(number, now()) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMin(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMin(number, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMin(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMin(number::String, 1) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMin(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMin(number, now() + number) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMinIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMinIf(number, now() + number, number % 10 < 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMinIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=100; +SELECT argMinIf(number, now() + number, number % 10 > 20) FROM (Select number as number from numbers(10, 10000)) settings max_threads=1, max_block_size=20000; +SELECT argMin(number, number::Float64) from numbers(2029); +SELECT argMinIf(number, number::Float64, number > 2030) from numbers(2029); +SELECT argMinIf(number, number::Float64, number > 2030) from numbers(2032); +SELECT argMin(number, -number::Float64) from numbers(2029); +SELECT argMinIf(number, -number::Float64, number > 2030) from numbers(2029); +SELECT argMinIf(number, -number::Float64, number > 2030) from numbers(2032); + +Select argMax((n, n), n) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +Select argMaxIf((n, n), n, n < 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +Select argMaxIf((n, n), n, n > 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); + +Select argMin((n, n), n) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +Select argMinIf((n, n), n, n < 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); +Select argMinIf((n, n), n, n > 5) t, toTypeName(t) FROM (Select if(number % 3 == 0, NULL, number) as n from numbers(10)); + +SET compile_aggregate_expressions=1; +SET min_count_to_compile_aggregate_expression=0; + +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + min(data), + min(data2), + min(data3), + min(data4), + min(data5); + +WITH + arrayJoin([1, 2, 3, nan, 4, 5]) AS data, + arrayJoin([nan, 1, 2, 3, 4]) AS data2, + arrayJoin([1, 2, 3, 4, nan]) AS data3, + arrayJoin([nan, nan, nan]) AS data4, + arrayJoin([nan, 1, 2, 3, nan]) AS data5 +SELECT + max(data), + max(data2), + max(data3), + max(data4), + max(data5); + +SELECT minIf(number, rand() % 2 == 3) from numbers(10); +SELECT maxIf(number, rand() % 2 == 3) from numbers(10); + +SELECT minIf(number::Float64, rand() % 2 == 3) from numbers(10); +SELECT maxIf(number::Float64, rand() % 2 == 3) from numbers(10); + +SELECT minIf(number::String, number < 10) as number from numbers(10, 1000); +SELECT maxIf(number::String, number < 10) as number from numbers(10, 1000); +SELECT maxIf(number::String, number % 3), maxIf(number::String, number % 5), minIf(number::String, number % 3), minIf(number::String, number > 10) from numbers(400); + +SELECT minIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); +SELECT maxIf(number::Nullable(String), number < 10) as number from numbers(10, 1000); + +SELECT min(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); +SELECT max(n::Nullable(String)) from (Select if(number < 15 and number % 2 == 1, number * 2, NULL) as n from numbers(10, 20)); From 3dcc1f5448b65f3228690e3992b82f9e15132a52 Mon Sep 17 00:00:00 2001 From: Pengyuan Bian Date: Wed, 27 Dec 2023 07:54:18 +0000 Subject: [PATCH 20/88] update. --- .../configs/config.xml | 9 +++ .../test.py | 58 +++++++++++-------- 2 files changed, 42 insertions(+), 25 deletions(-) diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml index 1a62adf2d6e..106edfc8351 100644 --- a/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/configs/config.xml @@ -9,5 +9,14 @@ true + + + +
+ disk_s3_plain_readonly +
+
+
+
diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/test.py b/tests/integration/test_attach_table_from_s3_plain_readonly/test.py index 724c87effe1..52ae74e02f8 100644 --- a/tests/integration/test_attach_table_from_s3_plain_readonly/test.py +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/test.py @@ -2,6 +2,7 @@ import re import os import logging import pytest +import json from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry @@ -10,8 +11,8 @@ from pathlib import Path cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", +node1 = cluster.add_instance( + "node1", main_configs=["configs/config.xml"], user_configs=["configs/settings.xml"], with_zookeeper=True, @@ -20,6 +21,16 @@ node = cluster.add_instance( macros={"shard": 1, "replica": 1}, ) +node2 = cluster.add_instance( + "node2", + main_configs=["configs/config.xml"], + user_configs=["configs/settings.xml"], + with_zookeeper=True, + with_minio=True, + stay_alive=True, + macros={"shard": 1, "replica": 2}, +) + uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") def upload_to_minio(minio_client, bucket_name, local_path, minio_path=''): @@ -35,7 +46,7 @@ def upload_to_minio(minio_client, bucket_name, local_path, minio_path=''): minio_client.put_object(bucket_name, minio_object_name, data, file_stat.st_size) logging.info(f'Uploaded {local_file_path} to {minio_object_name}') except S3Error as e: - logging.info(f'Error uploading {local_file_path}: {e}') + logging.error(f'Error uploading {local_file_path}: {e}') @pytest.fixture(scope="module") @@ -49,7 +60,8 @@ def started_cluster(): def test_attach_table_from_s3_plain_readonly(started_cluster): - node.query( + # Create an atomic DB with mergetree sample data + node1.query( """ create database local_db; @@ -59,35 +71,31 @@ def test_attach_table_from_s3_plain_readonly(started_cluster): """ ) - assert int(node.query("select num from local_db.test_table limit 1")) == 5 + assert int(node1.query("select num from local_db.test_table limit 1")) == 5 - # Copy local file into minio bucket - table_data_path = os.path.join(node.path, f"database/store") + # Copy local MergeTree data into minio bucket + table_data_path = os.path.join(node1.path, f"database/store") minio = cluster.minio_client - upload_to_minio(minio, cluster.minio_bucket, table_data_path, "data/disks/disk_s3_plain/") + upload_to_minio(minio, cluster.minio_bucket, table_data_path, "data/disks/disk_s3_plain/store/") - ### remove - s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) - for s3_object in s3_objects: - logging.info("bianpengyuan Existing S3 object: %s", s3_object.object_name) - ### remove + # Drop the non-replicated table, we don't need it anymore + table_uuid = node1.query("SELECT uuid FROM system.tables WHERE database='local_db' AND table='test_table'").strip() + node1.query("drop table local_db.test_table SYNC;") - # Create a replicated database, and attach the merge tree data disk - table_uuid = node.query("SELECT uuid FROM system.tables WHERE database='local_db' AND table='test_table'").strip() - node.query( + # Create a replicated database + node1.query("create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica1');") + node2.query("create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica2');") + + # Create a MergeTree table at one node, by attaching the merge tree data + node1.query( f""" - drop table local_db.test_table SYNC; - - create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica1'); attach table s3_plain_test_db.test_table UUID '{table_uuid}' (num UInt32) engine=MergeTree() order by num - settings - disk=disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/', - access_key_id='minio', - secret_access_key='minio123'); + settings storage_policy = 's3_plain_readonly' """ ) - assert int(node.query("select num from s3_plain_test_db.test_table limit 1")) == 5 + # Check that both nodes can query and get result. + assert int(node1.query("select num from s3_plain_test_db.test_table limit 1")) == 5 + assert int(node2.query("select num from s3_plain_test_db.test_table limit 1")) == 5 From 68787cedd662a12e8e937ad5f93d022473b7cac1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 27 Dec 2023 08:51:17 +0000 Subject: [PATCH 21/88] Linter warnings --- src/AggregateFunctions/AggregateFunctionMax.cpp | 8 ++++++-- src/AggregateFunctions/AggregateFunctionMin.cpp | 4 ++++ src/AggregateFunctions/AggregateFunctionMinMaxAny.h | 7 +++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionMax.cpp b/src/AggregateFunctions/AggregateFunctionMax.cpp index 069e80898d7..e74224a24c3 100644 --- a/src/AggregateFunctions/AggregateFunctionMax.cpp +++ b/src/AggregateFunctions/AggregateFunctionMax.cpp @@ -37,6 +37,7 @@ public: ssize_t if_argument_pos) const override; }; +// NOLINTBEGIN(bugprone-macro-parentheses) #define SPECIALIZE(TYPE) \ template <> \ void AggregateFunctionsSingleValueMax>>::addBatchSinglePlace( \ @@ -59,8 +60,9 @@ void AggregateFunctionsSingleValueMaxdata(place).changeIfGreater(opt.value()); \ } +// NOLINTEND(bugprone-macro-parentheses) - FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) +FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) #undef SPECIALIZE template @@ -75,6 +77,7 @@ void AggregateFunctionsSingleValueMax::addBatchSinglePlace( return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); } +// NOLINTBEGIN(bugprone-macro-parentheses) #define SPECIALIZE(TYPE) \ template <> \ void AggregateFunctionsSingleValueMax>>::addBatchSinglePlaceNotNull( \ @@ -101,8 +104,9 @@ void AggregateFunctionsSingleValueMaxdata(place).changeIfGreater(opt.value()); \ } +// NOLINTEND(bugprone-macro-parentheses) - FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) +FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) #undef SPECIALIZE template diff --git a/src/AggregateFunctions/AggregateFunctionMin.cpp b/src/AggregateFunctions/AggregateFunctionMin.cpp index 2312d639363..48758aa74b0 100644 --- a/src/AggregateFunctions/AggregateFunctionMin.cpp +++ b/src/AggregateFunctions/AggregateFunctionMin.cpp @@ -38,6 +38,7 @@ public: ssize_t if_argument_pos) const override; }; +// NOLINTBEGIN(bugprone-macro-parentheses) #define SPECIALIZE(TYPE) \ template <> \ void AggregateFunctionsSingleValueMin>>::addBatchSinglePlace( \ @@ -60,6 +61,7 @@ public: if (opt.has_value()) \ this->data(place).changeIfLess(opt.value()); \ } +// NOLINTEND(bugprone-macro-parentheses) FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) #undef SPECIALIZE @@ -76,6 +78,7 @@ void AggregateFunctionsSingleValueMin::addBatchSinglePlace( return Parent::addBatchSinglePlace(row_begin, row_end, place, columns, arena, if_argument_pos); } +// NOLINTBEGIN(bugprone-macro-parentheses) #define SPECIALIZE(TYPE) \ template <> \ void AggregateFunctionsSingleValueMin>>::addBatchSinglePlaceNotNull( \ @@ -102,6 +105,7 @@ void AggregateFunctionsSingleValueMin::addBatchSinglePlace( if (opt.has_value()) \ this->data(place).changeIfLess(opt.value()); \ } +// NOLINTEND(bugprone-macro-parentheses) FOR_BASIC_NUMERIC_TYPES(SPECIALIZE) #undef SPECIALIZE diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 0457b3d7d6f..b69a0b100a3 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -1245,8 +1245,11 @@ public: || StringRef(Data::name()) == StringRef("max")) { if (!type->isComparable()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of aggregate function {} " - "because the values of that data type are not comparable", type->getName(), getName()); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of aggregate function {} because the values of that data type are not comparable", + type->getName(), + Data::name()); } } From eeadeaa89d2e7099a1e373be8b465fba9c3e7f59 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 13:03:38 +0100 Subject: [PATCH 22/88] Revert "Merge pull request #58104 from ClickHouse/cleanup-replication-compatibility" This reverts commit 34fd555ee6c9bb03490c1f3a04b4fe707fb4083e, reversing changes made to cb53ee63bee55f3795e5dc93e3a92ef025d8c46f. --- src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index fc924d1f80c..85f99e3f8c3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -269,12 +269,6 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor deduplicate_by_columns = std::move(new_deduplicate_by_columns); } - else if (checkString("cleanup: ", in)) - { - /// Obsolete option, does nothing. - bool cleanup = false; - in >> cleanup; - } else trailing_newline_found = true; } From 4c4623e3ab89e7fa72d1825cad98376da9da1b88 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 13:03:53 +0100 Subject: [PATCH 23/88] Revert "Merge pull request #58100 from ClickHouse/optimize-cleanup-as-noop" This reverts commit 3b1aa53460ff77e56520df476ec7a3b48aec3e61, reversing changes made to b9466023ce23cde29db2e8c4e41725873c080036. --- src/Parsers/ParserOptimizeQuery.cpp | 4 ---- .../0_stateless/02948_optimize_cleanup_as_noop.reference | 0 .../queries/0_stateless/02948_optimize_cleanup_as_noop.sql | 7 ------- 3 files changed, 11 deletions(-) delete mode 100644 tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference delete mode 100644 tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql diff --git a/src/Parsers/ParserOptimizeQuery.cpp b/src/Parsers/ParserOptimizeQuery.cpp index 5d3b196caf8..826fbf38b36 100644 --- a/src/Parsers/ParserOptimizeQuery.cpp +++ b/src/Parsers/ParserOptimizeQuery.cpp @@ -28,7 +28,6 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ParserKeyword s_partition("PARTITION"); ParserKeyword s_final("FINAL"); ParserKeyword s_deduplicate("DEDUPLICATE"); - ParserKeyword s_cleanup("CLEANUP"); ParserKeyword s_by("BY"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p(true); @@ -77,9 +76,6 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return false; } - /// Obsolete feature, ignored for backward compatibility. - s_cleanup.ignore(pos, expected); - auto query = std::make_shared(); node = query; diff --git a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql deleted file mode 100644 index 002d696e62f..00000000000 --- a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql +++ /dev/null @@ -1,7 +0,0 @@ -# There was a wrong, harmful feature, leading to bugs and data corruption. -# This feature is removed, but we take care to maintain compatibility on the syntax level, so now it works as a no-op. - -DROP TABLE IF EXISTS t; -CREATE TABLE t (x UInt8, PRIMARY KEY x) ENGINE = ReplacingMergeTree; -OPTIMIZE TABLE t CLEANUP; -DROP TABLE t; From 01d042c490e31993bb8cc044892fbf2a2eedbec3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 13:46:06 +0100 Subject: [PATCH 24/88] Revert "Merge pull request #57932 from ClickHouse/remove-shit-cleanup" This reverts commit 2d58dc512cf2a529060716d38086069f149d62df, reversing changes made to 41873dc4a32f6db7f7ab347568d0c1dc0adab335. --- .../mergetree-family/replacingmergetree.md | 49 +++++- .../settings/merge-tree-settings.md | 10 ++ .../mergetree-family/replacingmergetree.md | 53 ++++++ .../server/config.d/graphite_alternative.xml | 1 - src/Core/SettingsEnums.cpp | 2 + src/Core/SettingsEnums.h | 8 + src/Interpreters/Context.cpp | 3 + src/Interpreters/InterpreterOptimizeQuery.cpp | 2 +- src/Parsers/ASTOptimizeQuery.cpp | 3 + src/Parsers/ASTOptimizeQuery.h | 4 +- src/Parsers/ParserOptimizeQuery.cpp | 6 + .../Algorithms/ReplacingSortedAlgorithm.cpp | 54 +++++- .../Algorithms/ReplacingSortedAlgorithm.h | 8 +- .../Merges/ReplacingSortedTransform.h | 11 +- .../QueryPlan/ReadFromMergeTree.cpp | 7 +- src/Storages/IStorage.h | 1 + .../MergeTree/MergeFromLogEntryTask.cpp | 1 + .../MergeTree/MergePlainMergeTreeTask.cpp | 1 + .../MergeTree/MergePlainMergeTreeTask.h | 3 + src/Storages/MergeTree/MergeTask.cpp | 16 +- src/Storages/MergeTree/MergeTask.h | 4 + src/Storages/MergeTree/MergeTreeData.cpp | 40 +++++ src/Storages/MergeTree/MergeTreeData.h | 3 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 + .../MergeTree/MergeTreeDataMergerMutator.h | 1 + .../MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 1 + .../MergeTree/ReplicatedMergeTreeLogEntry.cpp | 5 + .../MergeTree/ReplicatedMergeTreeLogEntry.h | 1 + .../ReplicatedMergeTreeTableMetadata.cpp | 10 ++ .../ReplicatedMergeTreeTableMetadata.h | 1 + .../MergeTree/registerStorageMergeTree.cpp | 11 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 4 + src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 1 + src/Storages/StorageBuffer.cpp | 8 +- src/Storages/StorageBuffer.h | 1 + src/Storages/StorageMaterializedView.cpp | 3 +- src/Storages/StorageMaterializedView.h | 1 + src/Storages/StorageMergeTree.cpp | 15 +- src/Storages/StorageMergeTree.h | 16 +- src/Storages/StorageProxy.h | 17 +- src/Storages/StorageReplicatedMergeTree.cpp | 17 ++ src/Storages/StorageReplicatedMergeTree.h | 2 + src/Storages/WindowView/StorageWindowView.cpp | 3 +- src/Storages/WindowView/StorageWindowView.h | 1 + ...lacing_merge_tree_vertical_merge.reference | 2 - ...77_replacing_merge_tree_vertical_merge.sql | 8 +- ...ing_merge_tree_is_deleted_column.reference | 99 +++++++++++ ...replacing_merge_tree_is_deleted_column.sql | 160 ++++++++++++++++++ ...select_final_on_single_partition.reference | 31 ++++ ...e_fix_select_final_on_single_partition.sql | 32 ++++ ...eplacing_merge_tree_with_cleanup.reference | 13 ++ ...2861_replacing_merge_tree_with_cleanup.sql | 23 +++ ...cated_merge_parameters_must_consistent.sql | 20 +++ 55 files changed, 754 insertions(+), 49 deletions(-) delete mode 120000 programs/server/config.d/graphite_alternative.xml create mode 100644 tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference create mode 100644 tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql create mode 100644 tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference create mode 100644 tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql create mode 100644 tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference create mode 100644 tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 9467da33398..6de818c130f 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -25,7 +25,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[SETTINGS name=value, ...] +[SETTINGS name=value, clean_deleted_rows=value, ...] ``` For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). @@ -88,6 +88,53 @@ SELECT * FROM mySecondReplacingMT FINAL; └─────┴─────────┴─────────────────────┘ ``` +### is_deleted + +`is_deleted` — Name of a column used during a merge to determine whether the data in this row represents the state or is to be deleted; `1` is a “deleted“ row, `0` is a “state“ row. + + Column data type — `UInt8`. + +:::note +`is_deleted` can only be enabled when `ver` is used. + +The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`. + +No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. + +::: + +Example: +```sql +-- with ver and is_deleted +CREATE OR REPLACE TABLE myThirdReplacingMT +( + `key` Int64, + `someCol` String, + `eventTime` DateTime, + `is_deleted` UInt8 +) +ENGINE = ReplacingMergeTree(eventTime, is_deleted) +ORDER BY key; + +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); + +select * from myThirdReplacingMT final; + +0 rows in set. Elapsed: 0.003 sec. + +-- delete rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; + +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); + +select * from myThirdReplacingMT final; + +┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ +│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +└─────┴─────────┴─────────────────────┴────────────┘ +``` + ## Query clauses When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index da049554c67..c7e461d15ae 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -852,6 +852,16 @@ If the file name for column is too long (more than `max_file_name_length` bytes) The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. The value of this setting does not include the length of file extension. So, it is recommended to set it below the maximum filename length (usually 255 bytes) with some gap to avoid filesystem errors. Default value: 127. +## clean_deleted_rows + +Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour. + +Possible values: + +- `Always` or `Never`. + +Default value: `Never` + ## allow_experimental_block_number_column Persists virtual column `_block_number` on merges. diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md index c17e7982b98..e8089b2c42b 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md @@ -86,6 +86,59 @@ SELECT * FROM mySecondReplacingMT FINAL; │ 1 │ first │ 2020-01-01 01:01:01 │ └─────┴─────────┴─────────────────────┘ ``` +### is_deleted + +`is_deleted` — Имя столбца, который используется во время слияния для обозначения того, нужно ли отображать строку или она подлежит удалению; `1` - для удаления строки, `0` - для отображения строки. + + Тип данных столбца — `UInt8`. + +:::note +`is_deleted` может быть использован, если `ver` используется. + +Строка удаляется в следующих случаях: + + - при использовании инструкции `OPTIMIZE ... FINAL CLEANUP` + - при использовании инструкции `OPTIMIZE ... FINAL` + - параметр движка `clean_deleted_rows` установлен в значение `Always` (по умолчанию - `Never`) + - есть новые версии строки + +Не рекомендуется выполнять `FINAL CLEANUP` или использовать параметр движка `clean_deleted_rows` со значением `Always`, это может привести к неожиданным результатам, например удаленные строки могут вновь появиться. + +Вне зависимости от производимых изменений над данными, версия должна увеличиваться. Если у двух строк одна и та же версия, то остается только последняя вставленная строка. +::: + +Пример: + +```sql +-- with ver and is_deleted +CREATE OR REPLACE TABLE myThirdReplacingMT +( + `key` Int64, + `someCol` String, + `eventTime` DateTime, + `is_deleted` UInt8 +) +ENGINE = ReplacingMergeTree(eventTime, is_deleted) +ORDER BY key; + +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); + +select * from myThirdReplacingMT final; + +0 rows in set. Elapsed: 0.003 sec. + +-- delete rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; + +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); + +select * from myThirdReplacingMT final; + +┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ +│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +└─────┴─────────┴─────────────────────┴────────────┘ +``` ## Секции запроса diff --git a/programs/server/config.d/graphite_alternative.xml b/programs/server/config.d/graphite_alternative.xml deleted file mode 120000 index 400b9e75f1f..00000000000 --- a/programs/server/config.d/graphite_alternative.xml +++ /dev/null @@ -1 +0,0 @@ -../../../tests/config/config.d/graphite_alternative.xml \ No newline at end of file diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index c35e69977ed..ee113a6776f 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -98,6 +98,8 @@ IMPLEMENT_SETTING_AUTO_ENUM(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUMENTS) IMPLEMENT_SETTING_AUTO_ENUM(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS) +IMPLEMENT_SETTING_AUTO_ENUM(CleanDeletedRows, ErrorCodes::BAD_ARGUMENTS) + IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, {{"decimal", MySQLDataTypesSupport::DECIMAL}, {"datetime64", MySQLDataTypesSupport::DATETIME64}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 2e71c96b954..7977a0b3ab6 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -140,6 +140,14 @@ enum class DefaultTableEngine DECLARE_SETTING_ENUM(DefaultTableEngine) +enum class CleanDeletedRows +{ + Never = 0, /// Disable. + Always, +}; + +DECLARE_SETTING_ENUM(CleanDeletedRows) + enum class MySQLDataTypesSupport { DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 25146ebc10d..e376ab5f0bf 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index 6be78deb897..ae456e8b31d 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -79,7 +79,7 @@ BlockIO InterpreterOptimizeQuery::execute() if (auto * snapshot_data = dynamic_cast(storage_snapshot->data.get())) snapshot_data->parts = {}; - table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, getContext()); + table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, ast.cleanup, getContext()); return {}; } diff --git a/src/Parsers/ASTOptimizeQuery.cpp b/src/Parsers/ASTOptimizeQuery.cpp index 720c7699fb6..173310f7930 100644 --- a/src/Parsers/ASTOptimizeQuery.cpp +++ b/src/Parsers/ASTOptimizeQuery.cpp @@ -24,6 +24,9 @@ void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatSt if (deduplicate) settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : ""); + if (cleanup) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " CLEANUP" << (settings.hilite ? hilite_none : ""); + if (deduplicate_by_columns) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " BY " << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTOptimizeQuery.h b/src/Parsers/ASTOptimizeQuery.h index 584b2f38fe6..4c914c11912 100644 --- a/src/Parsers/ASTOptimizeQuery.h +++ b/src/Parsers/ASTOptimizeQuery.h @@ -21,10 +21,12 @@ public: bool deduplicate = false; /// Deduplicate by columns. ASTPtr deduplicate_by_columns; + /// Delete 'is_deleted' data + bool cleanup = false; /** Get the text that identifies this element. */ String getID(char delim) const override { - return "OptimizeQuery" + (delim + getDatabase()) + delim + getTable() + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : ""); + return "OptimizeQuery" + (delim + getDatabase()) + delim + getTable() + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : "")+ (cleanup ? "_cleanup" : ""); } ASTPtr clone() const override diff --git a/src/Parsers/ParserOptimizeQuery.cpp b/src/Parsers/ParserOptimizeQuery.cpp index 826fbf38b36..e887ff445d2 100644 --- a/src/Parsers/ParserOptimizeQuery.cpp +++ b/src/Parsers/ParserOptimizeQuery.cpp @@ -28,6 +28,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ParserKeyword s_partition("PARTITION"); ParserKeyword s_final("FINAL"); ParserKeyword s_deduplicate("DEDUPLICATE"); + ParserKeyword s_cleanup("CLEANUP"); ParserKeyword s_by("BY"); ParserToken s_dot(TokenType::Dot); ParserIdentifier name_p(true); @@ -38,6 +39,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte ASTPtr partition; bool final = false; bool deduplicate = false; + bool cleanup = false; String cluster_str; if (!s_optimize_table.ignore(pos, expected)) @@ -68,6 +70,9 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte if (s_deduplicate.ignore(pos, expected)) deduplicate = true; + if (s_cleanup.ignore(pos, expected)) + cleanup = true; + ASTPtr deduplicate_by_columns; if (deduplicate && s_by.ignore(pos, expected)) { @@ -85,6 +90,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte query->final = final; query->deduplicate = deduplicate; query->deduplicate_by_columns = deduplicate_by_columns; + query->cleanup = cleanup; query->database = database; query->table = table; diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 139ccd815d2..0c0598171b3 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -3,22 +3,33 @@ #include #include - namespace DB { +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; +} + ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( const Block & header_, size_t num_inputs, SortDescription description_, + const String & is_deleted_column, const String & version_column, size_t max_block_size_rows, size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_, - bool use_average_block_sizes) + bool use_average_block_sizes, + bool cleanup_, + size_t * cleanedup_rows_count_) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes) + , cleanup(cleanup_) + , cleanedup_rows_count(cleanedup_rows_count_) { + if (!is_deleted_column.empty()) + is_deleted_column_number = header_.getPositionByName(is_deleted_column); if (!version_column.empty()) version_column_number = header_.getPositionByName(version_column); } @@ -65,7 +76,21 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// Write the data for the previous primary key. if (!selected_row.empty()) - insertRow(); + { + if (is_deleted_column_number != -1) + { + uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; + if (!cleanup || !value) + insertRow(); + else if (cleanup && cleanedup_rows_count != nullptr) + { + *cleanedup_rows_count += current_row_sources.size(); + current_row_sources.resize(0); + } + } + else + insertRow(); + } selected_row.clear(); } @@ -75,6 +100,13 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (out_row_sources_buf) current_row_sources.emplace_back(current.impl->order, true); + if (is_deleted_column_number != -1) + { + const UInt8 is_deleted = assert_cast(*current->all_columns[is_deleted_column_number]).getData()[current->getRow()]; + if ((is_deleted != 1) && (is_deleted != 0)) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect data: is_deleted = {} (must be 1 or 0).", toString(is_deleted)); + } + /// A non-strict comparison, since we select the last row for the same version values. if (version_column_number == -1 || selected_row.empty() @@ -105,7 +137,21 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// We will write the data for the last primary key. if (!selected_row.empty()) - insertRow(); + { + if (is_deleted_column_number != -1) + { + uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; + if (!cleanup || !value) + insertRow(); + else if (cleanup && cleanedup_rows_count != nullptr) + { + *cleanedup_rows_count += current_row_sources.size(); + current_row_sources.resize(0); + } + } + else + insertRow(); + } return Status(merged_data.pull(), true); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 2295d1c35d1..b2258918fde 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -21,11 +21,14 @@ public: ReplacingSortedAlgorithm( const Block & header, size_t num_inputs, SortDescription description_, + const String & is_deleted_column, const String & version_column, size_t max_block_size_rows, size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false); + bool use_average_block_sizes = false, + bool cleanup = false, + size_t * cleanedup_rows_count = nullptr); const char * getName() const override { return "ReplacingSortedAlgorithm"; } Status merge() override; @@ -33,7 +36,10 @@ public: private: MergedData merged_data; + ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; + bool cleanup = false; + size_t * cleanedup_rows_count = nullptr; using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 2; /// last, current. diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 8d25d153cb4..7e293db1aa8 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -14,21 +14,26 @@ public: ReplacingSortedTransform( const Block & header, size_t num_inputs, SortDescription description_, - const String & version_column, + const String & is_deleted_column, const String & version_column, size_t max_block_size_rows, size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, - bool use_average_block_sizes = false) + bool use_average_block_sizes = false, + bool cleanup = false, + size_t * cleanedup_rows_count = nullptr) : IMergingTransform( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), + is_deleted_column, version_column, max_block_size_rows, max_block_size_bytes, out_row_sources_buf_, - use_average_block_sizes) + use_average_block_sizes, + cleanup, + cleanedup_rows_count) { } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index d47d17ccd26..28eb5e5f709 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1025,7 +1025,7 @@ static void addMergingFinal( case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.version_column, max_block_size_rows, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false); + sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size_rows, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty()); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, @@ -1099,7 +1099,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( /// can use parallel select on such parts. bool no_merging_final = settings.do_not_merge_across_partitions_select_final && std::distance(parts_to_merge_ranges[range_index], parts_to_merge_ranges[range_index + 1]) == 1 && - parts_to_merge_ranges[range_index]->data_part->info.level > 0; + parts_to_merge_ranges[range_index]->data_part->info.level > 0 && + data.merging_params.is_deleted_column.empty(); Pipes pipes; { RangesInDataParts new_parts; @@ -1842,6 +1843,8 @@ Pipe ReadFromMergeTree::spreadMarkRanges( } } + if (!data.merging_params.is_deleted_column.empty() && !names.contains(data.merging_params.is_deleted_column)) + column_names_to_read.push_back(data.merging_params.is_deleted_column); if (!data.merging_params.sign_column.empty() && !names.contains(data.merging_params.sign_column)) column_names_to_read.push_back(data.merging_params.sign_column); if (!data.merging_params.version_column.empty() && !names.contains(data.merging_params.version_column)) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 28f9ec6677a..1693d0e073b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -515,6 +515,7 @@ public: bool /*final*/, bool /*deduplicate*/, const Names & /* deduplicate_by_columns */, + bool /*cleanup*/, ContextPtr /*context*/) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method optimize is not supported by storage {}", getName()); diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9be31859a19..3d8bc62b5cc 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -312,6 +312,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() reserved_space, entry.deduplicate, entry.deduplicate_by_columns, + entry.cleanup, storage.merging_params, NO_TRANSACTION_PTR); diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index aed9f70d216..c218acce903 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -131,6 +131,7 @@ void MergePlainMergeTreeTask::prepare() merge_mutate_entry->tagger->reserved_space, deduplicate, deduplicate_by_columns, + cleanup, storage.merging_params, txn); } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 2c93f9c9e2c..5cc9c0e50d3 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -20,6 +20,7 @@ public: StorageMetadataPtr metadata_snapshot_, bool deduplicate_, Names deduplicate_by_columns_, + bool cleanup_, MergeMutateSelectedEntryPtr merge_mutate_entry_, TableLockHolder table_lock_holder_, IExecutableTask::TaskResultCallback & task_result_callback_) @@ -27,6 +28,7 @@ public: , metadata_snapshot(std::move(metadata_snapshot_)) , deduplicate(deduplicate_) , deduplicate_by_columns(std::move(deduplicate_by_columns_)) + , cleanup(cleanup_) , merge_mutate_entry(std::move(merge_mutate_entry_)) , table_lock_holder(std::move(table_lock_holder_)) , task_result_callback(task_result_callback_) @@ -67,6 +69,7 @@ private: StorageMetadataPtr metadata_snapshot; bool deduplicate; Names deduplicate_by_columns; + bool cleanup; MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr}; TableLockHolder table_lock_holder; FutureMergedMutatedPartPtr future_part{nullptr}; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 8b5e9ba96ee..2269e4369d8 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -68,7 +68,10 @@ static void extractMergingAndGatheringColumns( /// Force version column for Replacing mode if (merging_params.mode == MergeTreeData::MergingParams::Replacing) + { + key_columns.emplace(merging_params.is_deleted_column); key_columns.emplace(merging_params.version_column); + } /// Force sign column for VersionedCollapsing mode. Version is already in primary key. if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) @@ -493,6 +496,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read; size_t input_rows_filtered = *global_ctx->input_rows_filtered; + size_t cleanedup_rows_count = global_ctx->cleanedup_rows_count; global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); @@ -506,12 +510,12 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// skipped writing rows_sources file. Otherwise rows_sources_count must be equal to the total /// number of input rows. if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) - && sum_input_rows_exact != rows_sources_count + input_rows_filtered) + && sum_input_rows_exact != rows_sources_count + input_rows_filtered + cleanedup_rows_count) throw Exception( ErrorCodes::LOGICAL_ERROR, - "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " + "Number of rows in source parts ({}) excluding filtered rows ({}) and cleaned up rows ({}) differs from number " "of bytes written to rows_sources file ({}). It is a bug.", - sum_input_rows_exact, input_rows_filtered, rows_sources_count); + sum_input_rows_exact, input_rows_filtered, cleanedup_rows_count, rows_sources_count); /// TemporaryDataOnDisk::createRawStream returns WriteBufferFromFile implementing IReadableWriteBuffer /// and we expect to get ReadBufferFromFile here. @@ -755,6 +759,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->space_reservation, global_ctx->deduplicate, global_ctx->deduplicate_by_columns, + global_ctx->cleanup, projection_merging_params, global_ctx->need_prefix, global_ctx->new_data_part.get(), @@ -1003,8 +1008,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() case MergeTreeData::MergingParams::Replacing: merged_transform = std::make_shared( - header, pipes.size(), sort_description, ctx->merging_params.version_column, - merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size); + header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column, + merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, + (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup, &global_ctx->cleanedup_rows_count); break; case MergeTreeData::MergingParams::Graphite: diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index aeede44fe88..8a96ceb8c40 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -67,6 +67,7 @@ public: ReservationSharedPtr space_reservation_, bool deduplicate_, Names deduplicate_by_columns_, + bool cleanup_, MergeTreeData::MergingParams merging_params_, bool need_prefix, IMergeTreeDataPart * parent_part_, @@ -90,6 +91,7 @@ public: global_ctx->space_reservation = std::move(space_reservation_); global_ctx->deduplicate = std::move(deduplicate_); global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_); + global_ctx->cleanup = std::move(cleanup_); global_ctx->parent_part = std::move(parent_part_); global_ctx->data = std::move(data_); global_ctx->mutator = std::move(mutator_); @@ -158,6 +160,8 @@ private: ReservationSharedPtr space_reservation{nullptr}; bool deduplicate{false}; Names deduplicate_by_columns{}; + bool cleanup{false}; + size_t cleanedup_rows_count{0}; NamesAndTypesList gathering_columns{}; NamesAndTypesList merging_columns{}; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 914affcc8f9..de34775548d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -846,6 +846,10 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat { const auto columns = metadata.getColumns().getAllPhysical(); + if (!is_deleted_column.empty() && mode != MergingParams::Replacing) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "is_deleted column for MergeTree cannot be specified in modes except Replacing."); + if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing) throw Exception(ErrorCodes::LOGICAL_ERROR, "Sign column for MergeTree cannot be specified " @@ -915,6 +919,41 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Version column {} does not exist in table declaration.", version_column); }; + /// Check that if the is_deleted column is needed, it exists and is of type UInt8. If exist, version column must be defined too but version checks are not done here. + auto check_is_deleted_column = [this, & columns](bool is_optional, const std::string & storage) + { + if (is_deleted_column.empty()) + { + if (is_optional) + return; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: is_deleted ({}) column for storage {} is empty", is_deleted_column, storage); + } + else + { + if (version_column.empty() && !is_optional) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column ({}) for storage {} is empty while is_deleted ({}) is not.", + version_column, storage, is_deleted_column); + + bool miss_is_deleted_column = true; + for (const auto & column : columns) + { + if (column.name == is_deleted_column) + { + if (!typeid_cast(column.type.get())) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "is_deleted column ({}) for storage {} must have type UInt8. Provided column of type {}.", + is_deleted_column, storage, column.type->getName()); + miss_is_deleted_column = false; + break; + } + } + + if (miss_is_deleted_column) + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "is_deleted column {} does not exist in table declaration.", is_deleted_column); + } + }; + + if (mode == MergingParams::Collapsing) check_sign_column(false, "CollapsingMergeTree"); @@ -951,6 +990,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (mode == MergingParams::Replacing) { + check_is_deleted_column(true, "ReplacingMergeTree"); check_version_column(true, "ReplacingMergeTree"); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c69c7aaba3d..098fe296b24 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -349,6 +349,9 @@ public: /// For Collapsing and VersionedCollapsing mode. String sign_column; + /// For Replacing mode. Can be empty for Replacing. + String is_deleted_column; + /// For Summing mode. If empty - columns_to_sum is determined automatically. Names columns_to_sum; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 42f480ed18a..f78b383e173 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -676,6 +676,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( ReservationSharedPtr space_reservation, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, bool need_prefix, @@ -692,6 +693,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( space_reservation, deduplicate, deduplicate_by_columns, + cleanup, merging_params, need_prefix, parent_part, diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 5e8a89c94a4..6eab0ee0c37 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -165,6 +165,7 @@ public: ReservationSharedPtr space_reservation, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, const MergeTreeData::MergingParams & merging_params, const MergeTreeTransactionPtr & txn, bool need_prefix = true, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index f63394a4d48..2a381afa805 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -325,7 +325,7 @@ Block MergeTreeDataWriter::mergeBlock( return nullptr; case MergeTreeData::MergingParams::Replacing: return std::make_shared( - block, 1, sort_description, merging_params.version_column, block_size + 1, /*block_size_bytes=*/0); + block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1, /*block_size_bytes=*/0); case MergeTreeData::MergingParams::Collapsing: return std::make_shared( block, 1, sort_description, merging_params.sign_column, diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 106e66d8a99..ef41dfe2c98 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -74,6 +74,7 @@ struct Settings; M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ M(UInt64, number_of_free_entries_in_pool_to_execute_optimize_entire_partition, 25, "When there is less than specified number of free entries in pool, do not try to execute optimize entire partition with a merge (this merge is created when set min_age_to_force_merge_seconds > 0 and min_age_to_force_merge_on_partition_only = true). This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ + M(CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never, "Is the Replicated Merge cleanup has to be done automatically at each merge or manually (possible values are 'Always'/'Never' (default))", 0) \ M(UInt64, replicated_max_mutations_in_one_entry, 10000, "Max number of mutation commands that can be merged together and executed in one MUTATE_PART entry (0 means unlimited)", 0) \ M(UInt64, number_of_mutations_to_delay, 500, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \ M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ @@ -232,7 +233,6 @@ struct Settings; MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_send_timeout, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_receive_timeout, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) \ - MAKE_OBSOLETE_MERGE_TREE_SETTING(M, String, clean_deleted_rows, "") \ /// Settings that should not change after the creation of a table. /// NOLINTNEXTLINE diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8c896edab14..e4070aa8262 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1057,6 +1057,7 @@ public: ctx->space_reservation, false, // TODO Do we need deduplicate for projections {}, + false, // no cleanup projection_merging_params, NO_TRANSACTION_PTR, /* need_prefix */ true, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index 85f99e3f8c3..9eb8b6ce24c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -96,6 +96,9 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const } } + if (cleanup) + out << "\ncleanup: " << cleanup; + break; case DROP_RANGE: @@ -269,6 +272,8 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor deduplicate_by_columns = std::move(new_deduplicate_by_columns); } + else if (checkString("cleanup: ", in)) + in >> cleanup; else trailing_newline_found = true; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 4821a80a29b..0ce59b18818 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -98,6 +98,7 @@ struct ReplicatedMergeTreeLogEntryData Strings source_parts; bool deduplicate = false; /// Do deduplicate on merge Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default). + bool cleanup = false; MergeType merge_type = MergeType::Regular; String column_name; String index_name; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index eec5454f9a7..41188891118 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -52,6 +52,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; + is_deleted_column = data.merging_params.is_deleted_column; columns_to_sum = fmt::format("{}", fmt::join(data.merging_params.columns_to_sum.begin(), data.merging_params.columns_to_sum.end(), ",")); version_column = data.merging_params.version_column; if (data.merging_params.mode == MergeTreeData::MergingParams::Graphite) @@ -156,6 +157,8 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const out << "merge parameters format version: " << merge_params_version << "\n"; if (!version_column.empty()) out << "version column: " << version_column << "\n"; + if (!is_deleted_column.empty()) + out << "is_deleted column: " << is_deleted_column << "\n"; if (!columns_to_sum.empty()) out << "columns to sum: " << columns_to_sum << "\n"; if (!graphite_params_hash.empty()) @@ -221,6 +224,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("version column: ", in)) in >> version_column >> "\n"; + if (checkString("is_deleted column: ", in)) + in >> is_deleted_column >> "\n"; + if (checkString("columns to sum: ", in)) in >> columns_to_sum >> "\n"; @@ -273,6 +279,10 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in version column. " "Stored in ZooKeeper: {}, local: {}", from_zk.version_column, version_column); + if (is_deleted_column != from_zk.is_deleted_column) + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in is_deleted column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.is_deleted_column, is_deleted_column); + if (columns_to_sum != from_zk.columns_to_sum) throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sum columns. " "Stored in ZooKeeper: {}, local: {}", from_zk.columns_to_sum, columns_to_sum); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 67de9fd64ba..15ed8671f9b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -29,6 +29,7 @@ struct ReplicatedMergeTreeTableMetadata int merge_params_version = REPLICATED_MERGE_TREE_METADATA_WITH_ALL_MERGE_PARAMETERS; String sign_column; String version_column; + String is_deleted_column; String columns_to_sum; String graphite_params_hash; String primary_key; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9ed87e5c9ef..9a5af77d57c 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -138,7 +138,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) * CollapsingMergeTree(date, [sample_key], primary_key, index_granularity, sign) * SummingMergeTree(date, [sample_key], primary_key, index_granularity, [columns_to_sum]) * AggregatingMergeTree(date, [sample_key], primary_key, index_granularity) - * ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column]) + * ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column [, is_deleted_column]]) * GraphiteMergeTree(date, [sample_key], primary_key, index_granularity, 'config_element') * * Alternatively, you can specify: @@ -441,6 +441,15 @@ static StoragePtr create(const StorageFactory::Arguments & args) } else if (merging_params.mode == MergeTreeData::MergingParams::Replacing) { + // if there is args and number of optional parameter is higher than 1 + // is_deleted is not allowed with the 'allow_deprecated_syntax_for_merge_tree' settings + if (arg_cnt - arg_num == 2 && !engine_args[arg_cnt - 1]->as() && is_extended_storage_def) + { + if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.is_deleted_column)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "is_deleted column name must be an identifier {}", verbose_help_message); + --arg_cnt; + } + /// If the last element is not index_granularity or replica_name (a literal), then this is the name of the version column. if (arg_cnt && !engine_args[arg_cnt - 1]->as()) { diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index c9843211e08..4ead714c740 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -321,6 +321,7 @@ bool StorageEmbeddedRocksDB::optimize( bool final, bool deduplicate, const Names & /* deduplicate_by_columns */, + bool cleanup, ContextPtr /*context*/) { if (partition) @@ -332,6 +333,9 @@ bool StorageEmbeddedRocksDB::optimize( if (deduplicate) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type EmbeddedRocksDB"); + if (cleanup) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CLEANUP cannot be specified when optimizing table of type EmbeddedRocksDB"); + std::shared_lock lock(rocksdb_ptr_mx); rocksdb::CompactRangeOptions compact_options; auto status = rocksdb_ptr->CompactRange(compact_options, nullptr, nullptr); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 733baebb601..b59fe72ef47 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -65,6 +65,7 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr context) override; bool supportsParallelInsert() const override { return true; } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 2646a7753e4..f3030dadc59 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -694,7 +694,7 @@ void StorageBuffer::flushAndPrepareForShutdown() try { - optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, getContext()); + optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, false /*cleanup*/, getContext()); } catch (...) { @@ -720,6 +720,7 @@ bool StorageBuffer::optimize( bool final, bool deduplicate, const Names & /* deduplicate_by_columns */, + bool cleanup, ContextPtr /*context*/) { if (partition) @@ -731,6 +732,9 @@ bool StorageBuffer::optimize( if (deduplicate) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type Buffer"); + if (cleanup) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CLEANUP cannot be specified when optimizing table of type Buffer"); + flushAllBuffers(false); return true; } @@ -1063,7 +1067,7 @@ void StorageBuffer::alter(const AlterCommands & params, ContextPtr local_context auto metadata_snapshot = getInMemoryMetadataPtr(); /// Flush buffers to the storage because BufferSource skips buffers with old metadata_version. - optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, local_context); + optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, false /*cleanup*/, local_context); StorageInMemoryMetadata new_metadata = *metadata_snapshot; params.apply(new_metadata, local_context); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 21eb86019fc..94873ea04ce 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -100,6 +100,7 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr context) override; bool supportsSampling() const override { return true; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 2339fd11cf8..0d958d20f49 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -262,12 +262,13 @@ bool StorageMaterializedView::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr local_context) { checkStatementCanBeForwarded(); auto storage_ptr = getTargetTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); + return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); } void StorageMaterializedView::alter( diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 9ddcc458f3e..55843197d50 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -54,6 +54,7 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr context) override; void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9378aaa1f6a..3f4466699ea 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1095,6 +1095,7 @@ bool StorageMergeTree::merge( bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, const MergeTreeTransactionPtr & txn, String & out_disable_reason, bool optimize_skip_merged_partitions) @@ -1134,7 +1135,7 @@ bool StorageMergeTree::merge( /// Copying a vector of columns `deduplicate by columns. IExecutableTask::TaskResultCallback f = [](bool) {}; auto task = std::make_shared( - *this, metadata_snapshot, deduplicate, deduplicate_by_columns, merge_mutate_entry, table_lock_holder, f); + *this, metadata_snapshot, deduplicate, deduplicate_by_columns, cleanup, merge_mutate_entry, table_lock_holder, f); task->setCurrentTransaction(MergeTreeTransactionHolder{}, MergeTreeTransactionPtr{txn}); @@ -1372,7 +1373,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merge_entry) { - auto task = std::make_shared(*this, metadata_snapshot, /* deduplicate */ false, Names{}, merge_entry, shared_lock, common_assignee_trigger); + auto task = std::make_shared(*this, metadata_snapshot, /* deduplicate */ false, Names{}, /* cleanup */ false, merge_entry, shared_lock, common_assignee_trigger); task->setCurrentTransaction(std::move(transaction_for_merge), std::move(txn)); bool scheduled = assignee.scheduleMergeMutateTask(task); /// The problem that we already booked a slot for TTL merge, but a merge list entry will be created only in a prepare method @@ -1506,6 +1507,7 @@ bool StorageMergeTree::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr local_context) { if (deduplicate) @@ -1521,6 +1523,13 @@ bool StorageMergeTree::optimize( String disable_reason; if (!partition && final) { + if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) + { + constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; + disable_reason = "only ReplacingMergeTree can be CLEANUP"; + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + } + DataPartsVector data_parts = getVisibleDataPartsVector(local_context); std::unordered_set partition_ids; @@ -1535,6 +1544,7 @@ bool StorageMergeTree::optimize( true, deduplicate, deduplicate_by_columns, + cleanup, txn, disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) @@ -1562,6 +1572,7 @@ bool StorageMergeTree::optimize( final, deduplicate, deduplicate_by_columns, + cleanup, txn, disable_reason, local_context->getSettingsRef().optimize_skip_merged_partitions)) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 89de60ed819..a0d0a2519a0 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -83,6 +83,7 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr context) override; void mutate(const MutationCommands & commands, ContextPtr context) override; @@ -171,13 +172,14 @@ private: * Returns true if merge is finished successfully. */ bool merge( - bool aggressive, - const String & partition_id, - bool final, bool deduplicate, - const Names & deduplicate_by_columns, - const MergeTreeTransactionPtr & txn, - String & out_disable_reason, - bool optimize_skip_merged_partitions = false); + bool aggressive, + const String & partition_id, + bool final, bool deduplicate, + const Names & deduplicate_by_columns, + bool cleanup, + const MergeTreeTransactionPtr & txn, + String & out_disable_reason, + bool optimize_skip_merged_partitions = false); void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 17f1b2a6d97..5d57f75a620 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -121,15 +121,16 @@ public: } bool optimize( - const ASTPtr & query, - const StorageMetadataPtr & metadata_snapshot, - const ASTPtr & partition, - bool final, - bool deduplicate, - const Names & deduplicate_by_columns, - ContextPtr context) override + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Names & deduplicate_by_columns, + bool cleanup, + ContextPtr context) override { - return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context); + return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, context); } void mutate(const MutationCommands & commands, ContextPtr context) override { getNested()->mutate(commands, context); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0089eeada00..beccfe88d0e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3643,6 +3643,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() const auto storage_settings_ptr = getSettings(); const bool deduplicate = false; /// TODO: read deduplicate option from table config const Names deduplicate_by_columns = {}; + const bool cleanup = (storage_settings_ptr->clean_deleted_rows != CleanDeletedRows::Never); CreateMergeEntryResult create_result = CreateMergeEntryResult::Other; enum class AttemptStatus @@ -3726,10 +3727,12 @@ void StorageReplicatedMergeTree::mergeSelectingTask() future_merged_part->part_format, deduplicate, deduplicate_by_columns, + cleanup, nullptr, merge_pred->getVersion(), future_merged_part->merge_type); + if (create_result == CreateMergeEntryResult::Ok) return AttemptStatus::EntryCreated; if (create_result == CreateMergeEntryResult::LogUpdated) @@ -3846,6 +3849,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c const MergeTreeDataPartFormat & merged_part_format, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ReplicatedMergeTreeLogEntryData * out_log_entry, int32_t log_version, MergeType merge_type) @@ -3885,6 +3889,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c entry.merge_type = merge_type; entry.deduplicate = deduplicate; entry.deduplicate_by_columns = deduplicate_by_columns; + entry.cleanup = cleanup; entry.create_time = time(nullptr); for (const auto & part : parts) @@ -5619,6 +5624,7 @@ bool StorageReplicatedMergeTree::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr query_context) { /// NOTE: exclusive lock cannot be used here, since this may lead to deadlock (see comments below), @@ -5630,6 +5636,9 @@ bool StorageReplicatedMergeTree::optimize( if (!is_leader) throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader"); + if (cleanup) + LOG_DEBUG(log, "Cleanup the ReplicatedMergeTree."); + auto handle_noop = [&](FormatStringHelper fmt_string, Args && ...args) { PreformattedMessage message = fmt_string.format(std::forward(args)...); @@ -5708,6 +5717,7 @@ bool StorageReplicatedMergeTree::optimize( future_merged_part->uuid, future_merged_part->part_format, deduplicate, deduplicate_by_columns, + cleanup, &merge_entry, can_merge.getVersion(), future_merged_part->merge_type); @@ -5732,6 +5742,13 @@ bool StorageReplicatedMergeTree::optimize( bool assigned = false; if (!partition && final) { + if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) + { + constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; + String disable_reason = "only ReplacingMergeTree can be CLEANUP"; + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + } + DataPartsVector data_parts = getVisibleDataPartsVector(query_context); std::unordered_set partition_ids; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 94ddaa753a5..3ffadf4568c 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -178,6 +178,7 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr query_context) override; void alter(const AlterCommands & commands, ContextPtr query_context, AlterLockHolder & table_lock_holder) override; @@ -747,6 +748,7 @@ private: const MergeTreeDataPartFormat & merged_part_format, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ReplicatedMergeTreeLogEntryData * out_log_entry, int32_t log_version, MergeType merge_type); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 3eff3f9f995..46c38ffa129 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -435,11 +435,12 @@ bool StorageWindowView::optimize( bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr local_context) { auto storage_ptr = getInnerTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); + return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); } void StorageWindowView::alter( diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index d2484ae8ebf..de8f880c602 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -134,6 +134,7 @@ public: bool final, bool deduplicate, const Names & deduplicate_by_columns, + bool cleanup, ContextPtr context) override; void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference index a4d91178d73..6bac6173183 100644 --- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference +++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.reference @@ -3,7 +3,5 @@ 2018-01-01 2 2 2018-01-01 2 2 == (Replicas) Test optimize == -d1 2 1 d2 1 0 -d3 2 1 d4 1 0 diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql index 9e293d0f7e2..e3c1bb10426 100644 --- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql +++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql @@ -7,14 +7,14 @@ create table tab_00577 (date Date, version UInt64, val UInt64) engine = Replacin insert into tab_00577 values ('2018-01-01', 2, 2), ('2018-01-01', 1, 1); insert into tab_00577 values ('2018-01-01', 0, 0); select * from tab_00577 order by version; -OPTIMIZE TABLE tab_00577 FINAL; +OPTIMIZE TABLE tab_00577 FINAL CLEANUP; select * from tab_00577; drop table tab_00577; DROP TABLE IF EXISTS testCleanupR1; CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8) - ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version) + ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted) ORDER BY uid SETTINGS enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0; INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0); @@ -22,9 +22,9 @@ INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1); INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1); SYSTEM SYNC REPLICA testCleanupR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" -OPTIMIZE TABLE testCleanupR1 FINAL; +OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP; -- Only d3 to d5 remain SELECT '== (Replicas) Test optimize =='; SELECT * FROM testCleanupR1 order by uid; -DROP TABLE IF EXISTS testCleanupR1 +DROP TABLE IF EXISTS testCleanupR1 \ No newline at end of file diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference new file mode 100644 index 00000000000..04a2b75bb4f --- /dev/null +++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference @@ -0,0 +1,99 @@ +== Test SELECT ... FINAL - no is_deleted == +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always == +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +== Test SELECT ... FINAL == +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +== Insert backups == +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +== Insert a second batch with overlaping data == +d1 5 0 +d2 3 0 +d3 3 0 +d4 3 0 +d5 1 0 +== Only last version remains after OPTIMIZE W/ CLEANUP == +d1 5 0 +d2 1 0 +d3 1 0 +d4 1 0 +d5 1 0 +d6 3 0 +== OPTIMIZE W/ CLEANUP (remove d6) == +d1 5 0 +d2 1 0 +d3 1 0 +d4 1 0 +d5 1 0 +== Test of the SETTINGS clean_deleted_rows as Always == +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +== Test of the SETTINGS clean_deleted_rows as Never == +d1 5 0 +d2 1 0 +d3 1 0 +d4 3 0 +d5 1 0 +d6 2 1 +== (Replicas) Test optimize == +d2 1 0 +d4 1 0 +== (Replicas) Test settings == +c2 1 0 +c4 1 0 +== Check cleanup & settings for other merge trees == +d1 1 1 +d1 1 1 +d1 1 1 +d1 1 1 1 +d1 1 1 1 diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql new file mode 100644 index 00000000000..8549300d49f --- /dev/null +++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql @@ -0,0 +1,160 @@ +-- Tags: zookeeper + +-- Settings allow_deprecated_syntax_for_merge_tree prevent to enable the is_deleted column +set allow_deprecated_syntax_for_merge_tree=0; + +-- Test the bahaviour without the is_deleted column +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid); +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +SELECT '== Test SELECT ... FINAL - no is_deleted =='; +select * from test FINAL order by uid; +OPTIMIZE TABLE test FINAL CLEANUP; +select * from test order by uid; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) SETTINGS clean_deleted_rows='Always'; +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +SELECT '== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always =='; +select * from test FINAL order by uid; +OPTIMIZE TABLE test FINAL CLEANUP; +select * from test order by uid; + +-- Test the new behaviour +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +SELECT '== Test SELECT ... FINAL =='; +select * from test FINAL order by uid; +select * from test order by uid; + +SELECT '== Insert backups =='; +INSERT INTO test (*) VALUES ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1); +select * from test FINAL order by uid; + +SELECT '== Insert a second batch with overlaping data =='; +INSERT INTO test (*) VALUES ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 1), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0), ('d2', 2, 1), ('d2', 3, 0), ('d3', 2, 1), ('d3', 3, 0); +select * from test FINAL order by uid; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); + +-- Expect d6 to be version=3 is_deleted=false +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); +-- Insert previous version of 'd6' but only v=3 is_deleted=false will remain +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1); +SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP =='; +OPTIMIZE TABLE test FINAL CLEANUP; +select * from test order by uid; + +-- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount) +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1); + +SELECT '== OPTIMIZE W/ CLEANUP (remove d6) =='; +OPTIMIZE TABLE test FINAL CLEANUP; +-- No d6 anymore +select * from test order by uid; + +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS clean_deleted_rows='Always'; + +SELECT '== Test of the SETTINGS clean_deleted_rows as Always =='; +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +-- Even if the setting is set to Always, the SELECT FINAL doesn't delete rows +select * from test FINAL order by uid; +select * from test order by uid; + +OPTIMIZE TABLE test FINAL; +-- d6 has to be removed since we set clean_deleted_rows as 'Always' +select * from test order by uid; + +SELECT '== Test of the SETTINGS clean_deleted_rows as Never =='; +ALTER TABLE test MODIFY SETTING clean_deleted_rows='Never'; +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +OPTIMIZE TABLE test FINAL; +-- d6 has NOT to be removed since we set clean_deleted_rows as 'Never' +select * from test order by uid; + +DROP TABLE IF EXISTS testCleanupR1; + +CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8) + ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted) + ORDER BY uid; + + +INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0); +INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1); +INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1); +SYSTEM SYNC REPLICA testCleanupR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" + +OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP; + +-- Only d3 to d5 remain +SELECT '== (Replicas) Test optimize =='; +SELECT * FROM testCleanupR1 order by uid; + +------------------------------ + +DROP TABLE IF EXISTS testSettingsR1; + +CREATE TABLE testSettingsR1 (col1 String, version UInt32, is_deleted UInt8) + ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_setting/', 'r1', version, is_deleted) + ORDER BY col1 + SETTINGS clean_deleted_rows = 'Always'; + +INSERT INTO testSettingsR1 (*) VALUES ('c1', 1, 1),('c2', 1, 0),('c3', 1, 1),('c4', 1, 0); +SYSTEM SYNC REPLICA testSettingsR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" + +OPTIMIZE TABLE testSettingsR1 FINAL; + +-- Only d3 to d5 remain +SELECT '== (Replicas) Test settings =='; +SELECT * FROM testSettingsR1 order by col1; + + +------------------------------ +-- Check errors +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); + +-- is_deleted == 0/1 +INSERT INTO test (*) VALUES ('d1', 1, 2); -- { serverError INCORRECT_DATA } + +DROP TABLE IF EXISTS test; +-- checkis_deleted type +CREATE TABLE test (uid String, version UInt32, is_deleted String) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); -- { serverError BAD_TYPE_OF_FIELD } + +-- is_deleted column for other mergeTrees - ErrorCodes::LOGICAL_ERROR) + +-- Check clean_deleted_rows='Always' for other MergeTrees +SELECT '== Check cleanup & settings for other merge trees =='; +CREATE TABLE testMT (uid String, version UInt32, is_deleted UInt8) ENGINE = MergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; +INSERT INTO testMT (*) VALUES ('d1', 1, 1); +OPTIMIZE TABLE testMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } +OPTIMIZE TABLE testMT FINAL; +SELECT * FROM testMT order by uid; + +CREATE TABLE testSummingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = SummingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; +INSERT INTO testSummingMT (*) VALUES ('d1', 1, 1); +OPTIMIZE TABLE testSummingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } +OPTIMIZE TABLE testSummingMT FINAL; +SELECT * FROM testSummingMT order by uid; + +CREATE TABLE testAggregatingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = AggregatingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; +INSERT INTO testAggregatingMT (*) VALUES ('d1', 1, 1); +OPTIMIZE TABLE testAggregatingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } +OPTIMIZE TABLE testAggregatingMT FINAL; +SELECT * FROM testAggregatingMT order by uid; + +CREATE TABLE testCollapsingMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = CollapsingMergeTree(sign) Order by (uid) SETTINGS clean_deleted_rows='Always'; +INSERT INTO testCollapsingMT (*) VALUES ('d1', 1, 1, 1); +OPTIMIZE TABLE testCollapsingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } +OPTIMIZE TABLE testCollapsingMT FINAL; +SELECT * FROM testCollapsingMT order by uid; + +CREATE TABLE testVersionedCMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = VersionedCollapsingMergeTree(sign, version) Order by (uid) SETTINGS clean_deleted_rows='Always'; +INSERT INTO testVersionedCMT (*) VALUES ('d1', 1, 1, 1); +OPTIMIZE TABLE testVersionedCMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } +OPTIMIZE TABLE testVersionedCMT FINAL; +SELECT * FROM testVersionedCMT order by uid; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference new file mode 100644 index 00000000000..d19222b55ec --- /dev/null +++ b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.reference @@ -0,0 +1,31 @@ +--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 +--- Verify that ReplacingMergeTree properly handles _is_deleted: +--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. +-- { echoOn } + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `account_id` UInt64, + `_is_deleted` UInt8, + `_version` UInt64 +) +ENGINE = ReplacingMergeTree(_version, _is_deleted) +ORDER BY (account_id); +INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); +-- Mark the first 100 rows as deleted. +INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); +-- Put everything in one partition +OPTIMIZE TABLE t FINAL; +SELECT count() FROM t; +1000 +SELECT count() FROM t FINAL; +900 +-- Both should produce the same number of rows. +-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, +-- as if no rows were deleted. +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; +900 +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; +900 +DROP TABLE t; diff --git a/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql new file mode 100644 index 00000000000..a89a1ff590a --- /dev/null +++ b/tests/queries/0_stateless/02814_ReplacingMergeTree_fix_select_final_on_single_partition.sql @@ -0,0 +1,32 @@ +--- Based on https://github.com/ClickHouse/ClickHouse/issues/49685 +--- Verify that ReplacingMergeTree properly handles _is_deleted: +--- SELECT FINAL should take `_is_deleted` into consideration when there is only one partition. +-- { echoOn } + +DROP TABLE IF EXISTS t; +CREATE TABLE t +( + `account_id` UInt64, + `_is_deleted` UInt8, + `_version` UInt64 +) +ENGINE = ReplacingMergeTree(_version, _is_deleted) +ORDER BY (account_id); + +INSERT INTO t SELECT number, 0, 1 FROM numbers(1e3); +-- Mark the first 100 rows as deleted. +INSERT INTO t SELECT number, 1, 1 FROM numbers(1e2); + +-- Put everything in one partition +OPTIMIZE TABLE t FINAL; + +SELECT count() FROM t; +SELECT count() FROM t FINAL; + +-- Both should produce the same number of rows. +-- Previously, `do_not_merge_across_partitions_select_final = 1` showed more rows, +-- as if no rows were deleted. +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 1; +SELECT count() FROM t FINAL SETTINGS do_not_merge_across_partitions_select_final = 0; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference new file mode 100644 index 00000000000..9c9caa22139 --- /dev/null +++ b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.reference @@ -0,0 +1,13 @@ +== Only last version remains after OPTIMIZE W/ CLEANUP == +d1 5 0 +d2 1 0 +d3 1 0 +d4 1 0 +d5 1 0 +d6 3 0 +== OPTIMIZE W/ CLEANUP (remove d6) == +d1 5 0 +d2 1 0 +d3 1 0 +d4 1 0 +d5 1 0 diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql new file mode 100644 index 00000000000..7b78e2900e7 --- /dev/null +++ b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS test; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS vertical_merge_algorithm_min_rows_to_activate = 1, + vertical_merge_algorithm_min_columns_to_activate = 0, + min_rows_for_wide_part = 1, + min_bytes_for_wide_part = 1; + +-- Expect d6 to be version=3 is_deleted=false +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); +-- Insert previous version of 'd6' but only v=3 is_deleted=false will remain +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1); +SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP =='; +OPTIMIZE TABLE test FINAL CLEANUP; +select * from test order by uid; + +-- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount) +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1); + +SELECT '== OPTIMIZE W/ CLEANUP (remove d6) =='; +OPTIMIZE TABLE test FINAL CLEANUP; +-- No d6 anymore +select * from test order by uid; + +DROP TABLE IF EXISTS test; diff --git a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql index c832e16e81e..3c1bec4fb3f 100644 --- a/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql +++ b/tests/queries/0_stateless/02910_replicated_merge_parameters_must_consistent.sql @@ -17,6 +17,26 @@ CREATE TABLE t_r ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t/', 'r2') ORDER BY id; -- { serverError METADATA_MISMATCH } +CREATE TABLE t2 +( + `id` UInt64, + `val` String, + `legacy_ver` UInt64, + `deleted` UInt8 +) +ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r1', legacy_ver) +ORDER BY id; + +CREATE TABLE t2_r +( + `id` UInt64, + `val` String, + `legacy_ver` UInt64, + `deleted` UInt8 +) +ENGINE = ReplicatedReplacingMergeTree('/tables/{database}/t2/', 'r2', legacy_ver, deleted) +ORDER BY id; -- { serverError METADATA_MISMATCH } + CREATE TABLE t3 ( `key` UInt64, From b9233f6d4fa3ab74bb52dfae3210b7e33f1e3984 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 23 Dec 2023 14:46:21 +0100 Subject: [PATCH 25/88] Move Allocator code into module part This should reduce amount of code that should be recompiled on Exception.h changes (and everything else that had been included there). This will actually not help a lot, because it is also included into PODArray.h and ThreadPool.h at least... Sigh. Signed-off-by: Azat Khuzhin --- .../AggregateFunctionGroupArray.cpp | 1 + .../examples/quantile-t-digest.cpp | 1 + src/Columns/ColumnCompressed.cpp | 1 + src/Columns/IColumnDummy.cpp | 1 + src/Columns/tests/gtest_column_vector.cpp | 1 + src/Common/Allocator.cpp | 183 +++++++++++++++++- src/Common/Allocator.h | 173 +---------------- src/Common/Arena.h | 1 + src/Common/ArenaWithFreeLists.h | 1 + src/Common/AsynchronousMetrics.cpp | 2 + src/Common/FiberStack.h | 5 + src/Common/OpenTelemetryTraceContext.cpp | 1 + src/Common/ZooKeeper/ZooKeeperImpl.cpp | 3 +- src/Common/examples/shell_command_inout.cpp | 1 + .../fetchPostgreSQLTableStructure.cpp | 1 - src/Disks/DiskLocal.cpp | 1 + .../ObjectStorages/DiskObjectStorage.cpp | 1 + src/Functions/formatReadableDecimalSize.cpp | 1 + src/Functions/formatReadableQuantity.cpp | 1 + src/Functions/formatReadableSize.cpp | 1 + src/Functions/randDistribution.cpp | 3 +- src/IO/MMapReadBufferFromFileWithCache.cpp | 1 + src/IO/ReadHelpers.h | 1 + .../Cache/WriteBufferToFileSegment.cpp | 1 + src/Interpreters/GraceHashJoin.cpp | 1 + src/Interpreters/HashJoin.cpp | 1 + src/Interpreters/loadMetadata.cpp | 1 + .../Formats/Impl/MySQLOutputFormat.cpp | 1 + src/Processors/Formats/Impl/Parquet/Write.cpp | 1 + .../Merges/MergingSortedTransform.cpp | 1 + .../AggregatingInOrderTransform.cpp | 1 + .../Transforms/AggregatingTransform.cpp | 1 + .../MergeTree/DataPartStorageOnDiskBase.cpp | 1 + src/Storages/StoragePostgreSQL.cpp | 1 + src/Storages/StorageS3Settings.cpp | 1 + src/Storages/StorageURL.cpp | 3 +- 36 files changed, 227 insertions(+), 174 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index b95471df90a..6c6397e35d5 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -20,6 +20,7 @@ #include #include +#include #include diff --git a/src/AggregateFunctions/examples/quantile-t-digest.cpp b/src/AggregateFunctions/examples/quantile-t-digest.cpp index b4e58e6203c..5360304b311 100644 --- a/src/AggregateFunctions/examples/quantile-t-digest.cpp +++ b/src/AggregateFunctions/examples/quantile-t-digest.cpp @@ -1,6 +1,7 @@ #include #include #include +#include int main(int, char **) { diff --git a/src/Columns/ColumnCompressed.cpp b/src/Columns/ColumnCompressed.cpp index 9fb7b108501..3bdc514d6d8 100644 --- a/src/Columns/ColumnCompressed.cpp +++ b/src/Columns/ColumnCompressed.cpp @@ -1,4 +1,5 @@ #include +#include #pragma clang diagnostic ignored "-Wold-style-cast" diff --git a/src/Columns/IColumnDummy.cpp b/src/Columns/IColumnDummy.cpp index 42b66e1156c..01091a87049 100644 --- a/src/Columns/IColumnDummy.cpp +++ b/src/Columns/IColumnDummy.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/src/Columns/tests/gtest_column_vector.cpp b/src/Columns/tests/gtest_column_vector.cpp index 14bf36434b6..b71d4a095ab 100644 --- a/src/Columns/tests/gtest_column_vector.cpp +++ b/src/Columns/tests/gtest_column_vector.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include using namespace DB; diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 2e00b157621..c4137920395 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -1,9 +1,190 @@ -#include "Allocator.h" +#include +#include +#include +#include +#include + +#include +#include + +#include +#include /// MADV_POPULATE_WRITE + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_ALLOCATE_MEMORY; + extern const int LOGICAL_ERROR; +} + +} + +namespace +{ + +using namespace DB; + +#if defined(MADV_POPULATE_WRITE) +/// Address passed to madvise is required to be aligned to the page boundary. +auto adjustToPageSize(void * buf, size_t len, size_t page_size) +{ + const uintptr_t address_numeric = reinterpret_cast(buf); + const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size; + return std::make_pair(reinterpret_cast(next_page_start), len - (next_page_start - address_numeric)); +} +#endif + +void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_) +{ +#if defined(MADV_POPULATE_WRITE) + if (len_ < POPULATE_THRESHOLD) + return; + + static const size_t page_size = ::getPageSize(); + if (len_ < page_size) /// Rounded address should be still within [buf, buf + len). + return; + + auto [buf, len] = adjustToPageSize(buf_, len_, page_size); + if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0) + LOG_TRACE( + LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1), + "Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)", + errnoToString(res)); +#endif +} + +template +void * allocNoTrack(size_t size, size_t alignment) +{ + void * buf; + if (alignment <= MALLOC_MIN_ALIGNMENT) + { + if constexpr (clear_memory) + buf = ::calloc(size, 1); + else + buf = ::malloc(size); + + if (nullptr == buf) + throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot malloc {}.", ReadableSize(size)); + } + else + { + buf = nullptr; + int res = posix_memalign(&buf, alignment, size); + + if (0 != res) + throw DB::ErrnoException( + DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)); + + if constexpr (clear_memory) + memset(buf, 0, size); + } + + if constexpr (populate) + prefaultPages(buf, size); + + return buf; +} + +void freeNoTrack(void * buf) +{ + ::free(buf); +} + +void checkSize(size_t size) +{ + /// More obvious exception in case of possible overflow (instead of just "Cannot mmap"). + if (size >= 0x8000000000000000ULL) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size); +} + +} /// Constant is chosen almost arbitrarily, what I observed is 128KB is too small, 1MB is almost indistinguishable from 64MB and 1GB is too large. extern const size_t POPULATE_THRESHOLD = 16 * 1024 * 1024; +template +void * Allocator::alloc(size_t size, size_t alignment) +{ + checkSize(size); + auto trace = CurrentMemoryTracker::alloc(size); + void * ptr = allocNoTrack(size, alignment); + trace.onAlloc(ptr, size); + return ptr; +} + + +template +void Allocator::free(void * buf, size_t size) +{ + try + { + checkSize(size); + freeNoTrack(buf); + auto trace = CurrentMemoryTracker::free(size); + trace.onFree(buf, size); + } + catch (...) + { + DB::tryLogCurrentException("Allocator::free"); + throw; + } +} + +template +void * Allocator::realloc(void * buf, size_t old_size, size_t new_size, size_t alignment) +{ + checkSize(new_size); + + if (old_size == new_size) + { + /// nothing to do. + /// BTW, it's not possible to change alignment while doing realloc. + } + else if (alignment <= MALLOC_MIN_ALIGNMENT) + { + /// Resize malloc'd memory region with no special alignment requirement. + auto trace_free = CurrentMemoryTracker::free(old_size); + auto trace_alloc = CurrentMemoryTracker::alloc(new_size); + trace_free.onFree(buf, old_size); + + void * new_buf = ::realloc(buf, new_size); + if (nullptr == new_buf) + { + throw DB::ErrnoException( + DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, + "Allocator: Cannot realloc from {} to {}", + ReadableSize(old_size), + ReadableSize(new_size)); + } + + buf = new_buf; + trace_alloc.onAlloc(buf, new_size); + + if constexpr (clear_memory) + if (new_size > old_size) + memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); + } + else + { + /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. + void * new_buf = alloc(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + free(buf, old_size); + buf = new_buf; + } + + if constexpr (populate) + prefaultPages(buf, new_size); + + return buf; +} + + template class Allocator; template class Allocator; template class Allocator; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 269e23f3719..b865dacc2e9 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -8,47 +8,19 @@ #define ALLOCATOR_ASLR 1 #endif -#include -#include - #if !defined(OS_DARWIN) && !defined(OS_FREEBSD) #include #endif -#include -#include -#include - #include -#include - -#include -#include -#include -#include - #include - -#include -#include -#include +#include extern const size_t POPULATE_THRESHOLD; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_ALLOCATE_MEMORY; - extern const int LOGICAL_ERROR; -} - -} - /** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+). * Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs. * Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb. @@ -69,83 +41,16 @@ class Allocator { public: /// Allocate memory range. - void * alloc(size_t size, size_t alignment = 0) - { - checkSize(size); - auto trace = CurrentMemoryTracker::alloc(size); - void * ptr = allocNoTrack(size, alignment); - trace.onAlloc(ptr, size); - return ptr; - } + void * alloc(size_t size, size_t alignment = 0); /// Free memory range. - void free(void * buf, size_t size) - { - try - { - checkSize(size); - freeNoTrack(buf); - auto trace = CurrentMemoryTracker::free(size); - trace.onFree(buf, size); - } - catch (...) - { - DB::tryLogCurrentException("Allocator::free"); - throw; - } - } + void free(void * buf, size_t size); /** Enlarge memory range. * Data from old range is moved to the beginning of new range. * Address of memory range could change. */ - void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) - { - checkSize(new_size); - - if (old_size == new_size) - { - /// nothing to do. - /// BTW, it's not possible to change alignment while doing realloc. - } - else if (alignment <= MALLOC_MIN_ALIGNMENT) - { - /// Resize malloc'd memory region with no special alignment requirement. - auto trace_free = CurrentMemoryTracker::free(old_size); - auto trace_alloc = CurrentMemoryTracker::alloc(new_size); - trace_free.onFree(buf, old_size); - - void * new_buf = ::realloc(buf, new_size); - if (nullptr == new_buf) - { - throw DB::ErrnoException( - DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, - "Allocator: Cannot realloc from {} to {}", - ReadableSize(old_size), - ReadableSize(new_size)); - } - - buf = new_buf; - trace_alloc.onAlloc(buf, new_size); - - if constexpr (clear_memory) - if (new_size > old_size) - memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); - } - else - { - /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. - void * new_buf = alloc(new_size, alignment); - memcpy(new_buf, buf, std::min(old_size, new_size)); - free(buf, old_size); - buf = new_buf; - } - - if constexpr (populate) - prefaultPages(buf, new_size); - - return buf; - } + void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0); protected: static constexpr size_t getStackThreshold() @@ -156,76 +61,6 @@ protected: static constexpr bool clear_memory = clear_memory_; private: - void * allocNoTrack(size_t size, size_t alignment) - { - void * buf; - if (alignment <= MALLOC_MIN_ALIGNMENT) - { - if constexpr (clear_memory) - buf = ::calloc(size, 1); - else - buf = ::malloc(size); - - if (nullptr == buf) - throw DB::ErrnoException(DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Allocator: Cannot malloc {}.", ReadableSize(size)); - } - else - { - buf = nullptr; - int res = posix_memalign(&buf, alignment, size); - - if (0 != res) - throw DB::ErrnoException( - DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)); - - if constexpr (clear_memory) - memset(buf, 0, size); - } - - if constexpr (populate) - prefaultPages(buf, size); - - return buf; - } - - void freeNoTrack(void * buf) - { - ::free(buf); - } - - void checkSize(size_t size) - { - /// More obvious exception in case of possible overflow (instead of just "Cannot mmap"). - if (size >= 0x8000000000000000ULL) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size); - } - - /// Address passed to madvise is required to be aligned to the page boundary. - auto adjustToPageSize(void * buf, size_t len, size_t page_size) - { - const uintptr_t address_numeric = reinterpret_cast(buf); - const size_t next_page_start = ((address_numeric + page_size - 1) / page_size) * page_size; - return std::make_pair(reinterpret_cast(next_page_start), len - (next_page_start - address_numeric)); - } - - void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_) - { -#if defined(MADV_POPULATE_WRITE) - if (len_ < POPULATE_THRESHOLD) - return; - - static const size_t page_size = ::getPageSize(); - if (len_ < page_size) /// Rounded address should be still within [buf, buf + len). - return; - - auto [buf, len] = adjustToPageSize(buf_, len_, page_size); - if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0) - LOG_TRACE( - LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1), - "Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)", - errnoToString(res)); -#endif - } }; diff --git a/src/Common/Arena.h b/src/Common/Arena.h index 7604091442e..917bef0d6e8 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -8,6 +8,7 @@ #include #include #include +#include #if __has_include() && defined(ADDRESS_SANITIZER) # include diff --git a/src/Common/ArenaWithFreeLists.h b/src/Common/ArenaWithFreeLists.h index 76760a20320..80b4a00241d 100644 --- a/src/Common/ArenaWithFreeLists.h +++ b/src/Common/ArenaWithFreeLists.h @@ -1,5 +1,6 @@ #pragma once +#include #include #if __has_include() && defined(ADDRESS_SANITIZER) # include diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index e8deb459b24..31cf1962251 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -8,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 067b0aa7a63..9d135f27306 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -13,6 +13,11 @@ #include #endif +/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + namespace DB::ErrorCodes { extern const int CANNOT_ALLOCATE_MEMORY; diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index ab1a430cebb..92803af93a9 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 9ec7208d3eb..d732b900d37 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1,4 +1,5 @@ -#include "Common/ZooKeeper/ZooKeeperConstants.h" +#include +#include #include #include diff --git a/src/Common/examples/shell_command_inout.cpp b/src/Common/examples/shell_command_inout.cpp index 615700cd042..a646dfba311 100644 --- a/src/Common/examples/shell_command_inout.cpp +++ b/src/Common/examples/shell_command_inout.cpp @@ -6,6 +6,7 @@ #include #include #include +#include /** This example shows how we can proxy stdin to ShellCommand and obtain stdout in streaming fashion. */ diff --git a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp index eb7f72b61aa..469ca52890a 100644 --- a/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp +++ b/src/Databases/PostgreSQL/fetchPostgreSQLTableStructure.cpp @@ -207,7 +207,6 @@ PostgreSQLTableStructure::ColumnsInfoPtr readNamesAndTypesList( columns.push_back(NameAndTypePair(column_name, data_type)); auto attgenerated = std::get<6>(row); - LOG_TEST(&Poco::Logger::get("kssenii"), "KSSENII: attgenerated: {}", attgenerated); attributes.emplace( column_name, diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 8e21b95ce68..5e77ff61789 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 9f4b59a6443..c3baf3fdbda 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Functions/formatReadableDecimalSize.cpp b/src/Functions/formatReadableDecimalSize.cpp index b6fd0de8f7b..1aa5abc526e 100644 --- a/src/Functions/formatReadableDecimalSize.cpp +++ b/src/Functions/formatReadableDecimalSize.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/src/Functions/formatReadableQuantity.cpp b/src/Functions/formatReadableQuantity.cpp index 682fac88969..483e8a77a0b 100644 --- a/src/Functions/formatReadableQuantity.cpp +++ b/src/Functions/formatReadableQuantity.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/src/Functions/formatReadableSize.cpp b/src/Functions/formatReadableSize.cpp index 22505907fa7..5c11603e9d7 100644 --- a/src/Functions/formatReadableSize.cpp +++ b/src/Functions/formatReadableSize.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp index db101486de8..4e616ada697 100644 --- a/src/Functions/randDistribution.cpp +++ b/src/Functions/randDistribution.cpp @@ -1,7 +1,8 @@ #include #include #include -#include "Common/Exception.h" +#include +#include #include #include #include diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index d13cf5db2f7..d53f3bc325d 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 17f3d3d4151..bba0b694d23 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -41,6 +41,7 @@ #include #include +#include #include static constexpr auto DEFAULT_MAX_STRING_SIZE = 1_GiB; diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 15a80667cc4..73d93514db5 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -7,6 +7,7 @@ #include #include +#include namespace DB { diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 89ea3a326cc..82403a4f96f 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 0d7c40cc27d..2aae82cd6de 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 541f9c6ee89..b2fd43c178c 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -1,3 +1,4 @@ +#include #include #include diff --git a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp index 7148996cc1d..784aa5494ba 100644 --- a/src/Processors/Formats/Impl/MySQLOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLOutputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 82e761f43e2..6d8f1ab55cb 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #if USE_SNAPPY #include diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 572a5204df7..62275f37857 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -3,6 +3,7 @@ #include #include +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 4e9f7b7601a..a39a0db1311 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index ecf8163a9d9..47d2c2c5cc6 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 0c7c50a687b..6c1377505d5 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index a97104a5a68..8fe2a161dba 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 0dc8d8d897b..b0c1160429a 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index d38d3486410..d6b6f5af61c 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -32,12 +32,13 @@ #include #include #include +#include +#include #include #include #include #include -#include #include #include #include From f1e67efa938c7afe567cbf6ed9ea070d8699751d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 09:28:24 +0100 Subject: [PATCH 26/88] Define _DARWIN_C_SOURCE for darwin (for MAP_ANON/MAP_ANONYMOUS) Signed-off-by: Azat Khuzhin --- cmake/target.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0d6993142b3..fb911ace7b5 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -12,6 +12,8 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") set (OS_DARWIN 1) add_definitions(-D OS_DARWIN) + # For MAP_ANON/MAP_ANONYMOUS + add_definitions(-D _DARWIN_C_SOURCE) elseif (CMAKE_SYSTEM_NAME MATCHES "SunOS") set (OS_SUNOS 1) add_definitions(-D OS_SUNOS) From d4df5a2e5ba060461634125fad9b69249a8747ff Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 09:31:10 +0100 Subject: [PATCH 27/88] Remove workaround for old enough MacOSX - this unit even not used (only in tests) - MacOSX is cross compiled Signed-off-by: Azat Khuzhin --- src/Common/ArrayCache.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Common/ArrayCache.h b/src/Common/ArrayCache.h index b6dde039227..cb15759e1ba 100644 --- a/src/Common/ArrayCache.h +++ b/src/Common/ArrayCache.h @@ -19,11 +19,6 @@ #include #include -/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - namespace DB { From a1ddfc909e3cd13542ab0a5e44e08758ab8e526d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 19:19:49 +0100 Subject: [PATCH 28/88] Do not include PODArray.h into NamePrompter.h Signed-off-by: Azat Khuzhin --- src/Common/NamePrompter.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/NamePrompter.h b/src/Common/NamePrompter.h index 97c345414bb..cc72554657f 100644 --- a/src/Common/NamePrompter.h +++ b/src/Common/NamePrompter.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include From 964d074186837078aaa12cbf5d735849a26065d4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 18:59:25 +0100 Subject: [PATCH 29/88] Remove extra excludes from PODArray Signed-off-by: Azat Khuzhin --- src/Common/PODArray.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 77cecf694f3..35353229303 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -7,8 +7,6 @@ #include #include - -#include #include #include From b60109d43e43cdf82eca16d124527d3415f95f2e Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 24 Apr 2023 13:24:55 +0000 Subject: [PATCH 30/88] Better --- .../Transforms/CheckSortedTransform.cpp | 14 +++++++++-- .../Transforms/CheckSortedTransform.h | 5 +++- src/Storages/MergeTree/MergeTask.cpp | 24 +++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/CheckSortedTransform.cpp b/src/Processors/Transforms/CheckSortedTransform.cpp index 4491301e274..74579130fdf 100644 --- a/src/Processors/Transforms/CheckSortedTransform.cpp +++ b/src/Processors/Transforms/CheckSortedTransform.cpp @@ -42,20 +42,28 @@ void CheckSortedTransform::transform(Chunk & chunk) else if (res > 0) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Sort order of blocks violated for column number {}, left: {}, right: {}.", + "Sort order of blocks violated for column number {}, left: {}, right: {}. Chunk {}, rows read {}.{}", column_number, applyVisitor(FieldVisitorDump(), (*left_col)[left_index]), - applyVisitor(FieldVisitorDump(), (*right_col)[right_index])); + applyVisitor(FieldVisitorDump(), (*right_col)[right_index]), + chunk_num, rows_read, + description.empty() ? String() : fmt::format(" ({})", description)); } } }; const auto & chunk_columns = chunk.getColumns(); + + ++rows_read; + if (!last_row.empty()) check(last_row, 0, chunk_columns, 0); for (size_t i = 1; i < num_rows; ++i) + { + ++rows_read; check(chunk_columns, i - 1, chunk_columns, i); + } last_row.clear(); for (const auto & chunk_column : chunk_columns) @@ -64,6 +72,8 @@ void CheckSortedTransform::transform(Chunk & chunk) column->insertFrom(*chunk_column, num_rows - 1); last_row.emplace_back(std::move(column)); } + + ++chunk_num; } } diff --git a/src/Processors/Transforms/CheckSortedTransform.h b/src/Processors/Transforms/CheckSortedTransform.h index 4daaaf79fdf..07005bdeacd 100644 --- a/src/Processors/Transforms/CheckSortedTransform.h +++ b/src/Processors/Transforms/CheckSortedTransform.h @@ -13,7 +13,7 @@ public: CheckSortedTransform(const Block & header, const SortDescription & sort_description); String getName() const override { return "CheckSortedTransform"; } - + void setDescription(const String & str) { description = str; } protected: void transform(Chunk & chunk) override; @@ -21,5 +21,8 @@ protected: private: SortDescriptionWithPositions sort_description_map; Columns last_row; + String description; + size_t chunk_num = 0; + size_t rows_read = 0; }; } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 8b5e9ba96ee..e51d08dbf1c 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -957,6 +958,20 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(sort_columns[i], 1, 1); + if (!sort_description.empty()) + { + for (size_t i = 0; i < pipes.size(); ++i) + { + auto & pipe = pipes[i]; + pipe.addSimpleTransform([&](const Block & header_) + { + auto transform = std::make_shared(header_, sort_description); + transform->setDescription(global_ctx->future_part->parts[i]->name); + return transform; + }); + } + } + /// The order of the streams is important: when the key is matched, the elements go in the order of the source stream number. /// In the merged part, the lines with the same key must be in the ascending order of the identifier of original part, /// that is going in insertion order. @@ -1023,6 +1038,15 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() auto res_pipe = Pipe::unitePipes(std::move(pipes)); res_pipe.addTransform(std::move(merged_transform)); + if (!sort_description.empty()) + { + res_pipe.addSimpleTransform([&](const Block & header_) + { + auto transform = std::make_shared(header_, sort_description); + return transform; + }); + } + if (global_ctx->deduplicate) { /// We don't want to deduplicate by block number column From 3dbd3b3e618e19f32a45ae8d1577f606ac48cdff Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 24 Apr 2023 13:26:56 +0000 Subject: [PATCH 31/88] Better --- src/Storages/MergeTree/MergeTask.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index e51d08dbf1c..a8b657d0e3e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -958,6 +958,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(sort_columns[i], 1, 1); +#ifndef NDEBUG if (!sort_description.empty()) { for (size_t i = 0; i < pipes.size(); ++i) @@ -971,6 +972,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() }); } } +#endif /// The order of the streams is important: when the key is matched, the elements go in the order of the source stream number. /// In the merged part, the lines with the same key must be in the ascending order of the identifier of original part, @@ -1038,6 +1040,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() auto res_pipe = Pipe::unitePipes(std::move(pipes)); res_pipe.addTransform(std::move(merged_transform)); +#ifndef NDEBUG if (!sort_description.empty()) { res_pipe.addSimpleTransform([&](const Block & header_) @@ -1046,6 +1049,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() return transform; }); } +#endif if (global_ctx->deduplicate) { From ac87daf8c3eacfbfe877ee1634d096915f7069f3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 27 Dec 2023 15:47:37 +0000 Subject: [PATCH 32/88] Fix bug in TTL GROUP BY (cherry picked from commit d9f2a2935c1c6760ef2725f2eb6d13ed9f0815fe) --- .../TTL/TTLAggregationAlgorithm.cpp | 16 +- .../02949_ttl_group_by_bug.reference | 709 ++++++++++++++++++ .../0_stateless/02949_ttl_group_by_bug.sql | 32 + 3 files changed, 754 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02949_ttl_group_by_bug.reference create mode 100644 tests/queries/0_stateless/02949_ttl_group_by_bug.sql diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp index 222b2d3df3e..72cd973aa06 100644 --- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -30,8 +30,8 @@ TTLAggregationAlgorithm::TTLAggregationAlgorithm( false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, - 0, - 0, + /*group_by_two_level_threshold*/0, + /*group_by_two_level_threshold_bytes*/0, settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, storage_.getContext()->getTempDataOnDisk(), @@ -100,7 +100,17 @@ void TTLAggregationAlgorithm::execute(Block & block) } } - if (!same_as_current) + /// We are observing the row with new aggregation key. + /// In this case we definitely need to finish the current aggregation for the previuos key + /// write results to `result_columns`. + const bool observing_new_key = !same_as_current; + /// We are observing the row with the same aggregation key, but TTL is not expired anymore. + /// In this case we need to finish aggregation here. The current row has to be written as is. + const bool no_new_rows_to_aggregate_within_the_same_key = same_as_current && !ttl_expired; + /// The aggregation for this aggregation key is done. + const bool need_to_flush_aggregation_state = observing_new_key || no_new_rows_to_aggregate_within_the_same_key; + + if (need_to_flush_aggregation_state) { if (rows_with_current_key) { diff --git a/tests/queries/0_stateless/02949_ttl_group_by_bug.reference b/tests/queries/0_stateless/02949_ttl_group_by_bug.reference new file mode 100644 index 00000000000..9782d63118b --- /dev/null +++ b/tests/queries/0_stateless/02949_ttl_group_by_bug.reference @@ -0,0 +1,709 @@ +0 2023-12-27 15:36:00 0 0 0 +0 2023-12-27 15:39:00 0 0 0 +0 2023-12-27 15:41:54 0 0 0 +0 2023-12-27 15:41:59 0 0 0 +0 2023-12-27 15:42:04 0 0 0 +0 2023-12-27 15:42:09 0 0 0 +0 2023-12-27 15:42:14 0 0 0 +0 2023-12-27 15:42:19 0 0 0 +0 2023-12-27 15:42:24 0 0 0 +0 2023-12-27 15:42:29 0 0 0 +0 2023-12-27 15:42:34 0 0 0 +0 2023-12-27 15:42:39 0 0 0 +0 2023-12-27 15:42:44 0 0 0 +0 2023-12-27 15:42:49 0 0 0 +0 2023-12-27 15:42:54 0 0 0 +0 2023-12-27 15:42:59 0 0 0 +0 2023-12-27 15:43:04 0 0 0 +0 2023-12-27 15:43:09 0 0 0 +0 2023-12-27 15:43:14 0 0 0 +0 2023-12-27 15:43:19 0 0 0 +0 2023-12-27 15:43:24 0 0 0 +0 2023-12-27 15:43:29 0 0 0 +0 2023-12-27 15:43:34 0 0 0 +0 2023-12-27 15:43:39 0 0 0 +0 2023-12-27 15:43:44 0 0 0 +0 2023-12-27 15:43:49 0 0 0 +0 2023-12-27 15:43:54 0 0 0 +0 2023-12-27 15:43:59 0 0 0 +0 2023-12-27 15:44:04 0 0 0 +0 2023-12-27 15:44:09 0 0 0 +0 2023-12-27 15:44:14 0 0 0 +0 2023-12-27 15:44:19 0 0 0 +0 2023-12-27 15:44:24 0 0 0 +0 2023-12-27 15:44:29 0 0 0 +0 2023-12-27 15:44:34 0 0 0 +0 2023-12-27 15:44:39 0 0 0 +0 2023-12-27 15:44:44 0 0 0 +0 2023-12-27 15:44:49 0 0 0 +0 2023-12-27 15:44:54 0 0 0 +0 2023-12-27 15:44:59 0 0 0 +0 2023-12-27 15:45:04 0 0 0 +0 2023-12-27 15:45:09 0 0 0 +0 2023-12-27 15:45:14 0 0 0 +0 2023-12-27 15:45:19 0 0 0 +0 2023-12-27 15:45:24 0 0 0 +0 2023-12-27 15:45:29 0 0 0 +0 2023-12-27 15:45:34 0 0 0 +0 2023-12-27 15:45:39 0 0 0 +0 2023-12-27 15:45:44 0 0 0 +0 2023-12-27 15:45:49 0 0 0 +0 2023-12-27 15:45:54 0 0 0 +0 2023-12-27 15:45:59 0 0 0 +0 2023-12-27 15:46:04 0 0 0 +0 2023-12-27 15:46:09 0 0 0 +0 2023-12-27 15:46:14 0 0 0 +0 2023-12-27 15:46:19 0 0 0 +0 2023-12-27 15:46:24 0 0 0 +0 2023-12-27 15:46:29 0 0 0 +0 2023-12-27 15:46:34 0 0 0 +0 2023-12-27 15:46:39 0 0 0 +0 2023-12-27 15:46:44 0 0 0 +0 2023-12-27 15:46:49 0 0 0 +0 2023-12-27 15:46:54 0 0 0 +0 2023-12-27 15:46:59 0 0 0 +0 2023-12-27 15:47:04 0 0 0 +0 2023-12-27 15:47:09 0 0 0 +0 2023-12-27 15:47:14 0 0 0 +0 2023-12-27 15:47:19 0 0 0 +0 2023-12-27 15:47:24 0 0 0 +0 2023-12-27 15:47:29 0 0 0 +0 2023-12-27 15:47:34 0 0 0 +0 2023-12-27 15:47:39 0 0 0 +0 2023-12-27 15:47:44 0 0 0 +0 2023-12-27 15:47:49 0 0 0 +0 2023-12-27 15:47:54 0 0 0 +0 2023-12-27 15:47:59 0 0 0 +0 2023-12-27 15:48:04 0 0 0 +0 2023-12-27 15:48:09 0 0 0 +0 2023-12-27 15:48:14 0 0 0 +0 2023-12-27 15:48:19 0 0 0 +0 2023-12-27 15:48:24 0 0 0 +0 2023-12-27 15:48:29 0 0 0 +0 2023-12-27 15:48:34 0 0 0 +0 2023-12-27 15:48:39 0 0 0 +0 2023-12-27 15:48:44 0 0 0 +0 2023-12-27 15:48:49 0 0 0 +0 2023-12-27 15:48:54 0 0 0 +0 2023-12-27 15:48:59 0 0 0 +0 2023-12-27 15:49:04 0 0 0 +0 2023-12-27 15:49:09 0 0 0 +0 2023-12-27 15:49:14 0 0 0 +0 2023-12-27 15:49:19 0 0 0 +0 2023-12-27 15:49:24 0 0 0 +0 2023-12-27 15:49:29 0 0 0 +0 2023-12-27 15:49:34 0 0 0 +0 2023-12-27 15:49:39 0 0 0 +0 2023-12-27 15:49:44 0 0 0 +0 2023-12-27 15:49:49 0 0 0 +0 2023-12-27 15:49:54 0 0 0 +0 2023-12-27 15:49:59 0 0 0 +0 2023-12-27 15:50:04 0 0 0 +0 2023-12-27 15:50:09 0 0 0 +0 2023-12-27 15:50:14 0 0 0 +0 2023-12-27 15:50:19 0 0 0 +0 2023-12-27 15:50:24 0 0 0 +0 2023-12-27 15:50:29 0 0 0 +0 2023-12-27 15:50:34 0 0 0 +0 2023-12-27 15:50:39 0 0 0 +0 2023-12-27 15:50:44 0 0 0 +0 2023-12-27 15:50:49 0 0 0 +0 2023-12-27 15:50:54 0 0 0 +0 2023-12-27 15:50:59 0 0 0 +0 2023-12-27 15:51:04 0 0 0 +0 2023-12-27 15:51:09 0 0 0 +0 2023-12-27 15:51:14 0 0 0 +0 2023-12-27 15:51:19 0 0 0 +0 2023-12-27 15:51:24 0 0 0 +0 2023-12-27 15:51:29 0 0 0 +0 2023-12-27 15:51:34 0 0 0 +0 2023-12-27 15:51:39 0 0 0 +0 2023-12-27 15:51:44 0 0 0 +0 2023-12-27 15:51:49 0 0 0 +0 2023-12-27 15:51:54 0 0 0 +0 2023-12-27 15:51:59 0 0 0 +0 2023-12-27 15:52:04 0 0 0 +0 2023-12-27 15:52:09 0 0 0 +0 2023-12-27 15:52:14 0 0 0 +0 2023-12-27 15:52:19 0 0 0 +0 2023-12-27 15:52:24 0 0 0 +0 2023-12-27 15:52:29 0 0 0 +0 2023-12-27 15:52:34 0 0 0 +0 2023-12-27 15:52:39 0 0 0 +0 2023-12-27 15:52:44 0 0 0 +0 2023-12-27 15:52:49 0 0 0 +0 2023-12-27 15:52:54 0 0 0 +0 2023-12-27 15:52:59 0 0 0 +0 2023-12-27 15:53:04 0 0 0 +0 2023-12-27 15:53:09 0 0 0 +0 2023-12-27 15:53:14 0 0 0 +0 2023-12-27 15:53:19 0 0 0 +0 2023-12-27 15:53:24 0 0 0 +1 2023-12-27 15:36:00 0 0 0 +1 2023-12-27 15:39:00 0 0 0 +1 2023-12-27 15:41:50 0 0 0 +1 2023-12-27 15:41:55 0 0 0 +1 2023-12-27 15:42:00 0 0 0 +1 2023-12-27 15:42:05 0 0 0 +1 2023-12-27 15:42:10 0 0 0 +1 2023-12-27 15:42:15 0 0 0 +1 2023-12-27 15:42:20 0 0 0 +1 2023-12-27 15:42:25 0 0 0 +1 2023-12-27 15:42:30 0 0 0 +1 2023-12-27 15:42:35 0 0 0 +1 2023-12-27 15:42:40 0 0 0 +1 2023-12-27 15:42:45 0 0 0 +1 2023-12-27 15:42:50 0 0 0 +1 2023-12-27 15:42:55 0 0 0 +1 2023-12-27 15:43:00 0 0 0 +1 2023-12-27 15:43:05 0 0 0 +1 2023-12-27 15:43:10 0 0 0 +1 2023-12-27 15:43:15 0 0 0 +1 2023-12-27 15:43:20 0 0 0 +1 2023-12-27 15:43:25 0 0 0 +1 2023-12-27 15:43:30 0 0 0 +1 2023-12-27 15:43:35 0 0 0 +1 2023-12-27 15:43:40 0 0 0 +1 2023-12-27 15:43:45 0 0 0 +1 2023-12-27 15:43:50 0 0 0 +1 2023-12-27 15:43:55 0 0 0 +1 2023-12-27 15:44:00 0 0 0 +1 2023-12-27 15:44:05 0 0 0 +1 2023-12-27 15:44:10 0 0 0 +1 2023-12-27 15:44:15 0 0 0 +1 2023-12-27 15:44:20 0 0 0 +1 2023-12-27 15:44:25 0 0 0 +1 2023-12-27 15:44:30 0 0 0 +1 2023-12-27 15:44:35 0 0 0 +1 2023-12-27 15:44:40 0 0 0 +1 2023-12-27 15:44:45 0 0 0 +1 2023-12-27 15:44:50 0 0 0 +1 2023-12-27 15:44:55 0 0 0 +1 2023-12-27 15:45:00 0 0 0 +1 2023-12-27 15:45:05 0 0 0 +1 2023-12-27 15:45:10 0 0 0 +1 2023-12-27 15:45:15 0 0 0 +1 2023-12-27 15:45:20 0 0 0 +1 2023-12-27 15:45:25 0 0 0 +1 2023-12-27 15:45:30 0 0 0 +1 2023-12-27 15:45:35 0 0 0 +1 2023-12-27 15:45:40 0 0 0 +1 2023-12-27 15:45:45 0 0 0 +1 2023-12-27 15:45:50 0 0 0 +1 2023-12-27 15:45:55 0 0 0 +1 2023-12-27 15:46:00 0 0 0 +1 2023-12-27 15:46:05 0 0 0 +1 2023-12-27 15:46:10 0 0 0 +1 2023-12-27 15:46:15 0 0 0 +1 2023-12-27 15:46:20 0 0 0 +1 2023-12-27 15:46:25 0 0 0 +1 2023-12-27 15:46:30 0 0 0 +1 2023-12-27 15:46:35 0 0 0 +1 2023-12-27 15:46:40 0 0 0 +1 2023-12-27 15:46:45 0 0 0 +1 2023-12-27 15:46:50 0 0 0 +1 2023-12-27 15:46:55 0 0 0 +1 2023-12-27 15:47:00 0 0 0 +1 2023-12-27 15:47:05 0 0 0 +1 2023-12-27 15:47:10 0 0 0 +1 2023-12-27 15:47:15 0 0 0 +1 2023-12-27 15:47:20 0 0 0 +1 2023-12-27 15:47:25 0 0 0 +1 2023-12-27 15:47:30 0 0 0 +1 2023-12-27 15:47:35 0 0 0 +1 2023-12-27 15:47:40 0 0 0 +1 2023-12-27 15:47:45 0 0 0 +1 2023-12-27 15:47:50 0 0 0 +1 2023-12-27 15:47:55 0 0 0 +1 2023-12-27 15:48:00 0 0 0 +1 2023-12-27 15:48:05 0 0 0 +1 2023-12-27 15:48:10 0 0 0 +1 2023-12-27 15:48:15 0 0 0 +1 2023-12-27 15:48:20 0 0 0 +1 2023-12-27 15:48:25 0 0 0 +1 2023-12-27 15:48:30 0 0 0 +1 2023-12-27 15:48:35 0 0 0 +1 2023-12-27 15:48:40 0 0 0 +1 2023-12-27 15:48:45 0 0 0 +1 2023-12-27 15:48:50 0 0 0 +1 2023-12-27 15:48:55 0 0 0 +1 2023-12-27 15:49:00 0 0 0 +1 2023-12-27 15:49:05 0 0 0 +1 2023-12-27 15:49:10 0 0 0 +1 2023-12-27 15:49:15 0 0 0 +1 2023-12-27 15:49:20 0 0 0 +1 2023-12-27 15:49:25 0 0 0 +1 2023-12-27 15:49:30 0 0 0 +1 2023-12-27 15:49:35 0 0 0 +1 2023-12-27 15:49:40 0 0 0 +1 2023-12-27 15:49:45 0 0 0 +1 2023-12-27 15:49:50 0 0 0 +1 2023-12-27 15:49:55 0 0 0 +1 2023-12-27 15:50:00 0 0 0 +1 2023-12-27 15:50:05 0 0 0 +1 2023-12-27 15:50:10 0 0 0 +1 2023-12-27 15:50:15 0 0 0 +1 2023-12-27 15:50:20 0 0 0 +1 2023-12-27 15:50:25 0 0 0 +1 2023-12-27 15:50:30 0 0 0 +1 2023-12-27 15:50:35 0 0 0 +1 2023-12-27 15:50:40 0 0 0 +1 2023-12-27 15:50:45 0 0 0 +1 2023-12-27 15:50:50 0 0 0 +1 2023-12-27 15:50:55 0 0 0 +1 2023-12-27 15:51:00 0 0 0 +1 2023-12-27 15:51:05 0 0 0 +1 2023-12-27 15:51:10 0 0 0 +1 2023-12-27 15:51:15 0 0 0 +1 2023-12-27 15:51:20 0 0 0 +1 2023-12-27 15:51:25 0 0 0 +1 2023-12-27 15:51:30 0 0 0 +1 2023-12-27 15:51:35 0 0 0 +1 2023-12-27 15:51:40 0 0 0 +1 2023-12-27 15:51:45 0 0 0 +1 2023-12-27 15:51:50 0 0 0 +1 2023-12-27 15:51:55 0 0 0 +1 2023-12-27 15:52:00 0 0 0 +1 2023-12-27 15:52:05 0 0 0 +1 2023-12-27 15:52:10 0 0 0 +1 2023-12-27 15:52:15 0 0 0 +1 2023-12-27 15:52:20 0 0 0 +1 2023-12-27 15:52:25 0 0 0 +1 2023-12-27 15:52:30 0 0 0 +1 2023-12-27 15:52:35 0 0 0 +1 2023-12-27 15:52:40 0 0 0 +1 2023-12-27 15:52:45 0 0 0 +1 2023-12-27 15:52:50 0 0 0 +1 2023-12-27 15:52:55 0 0 0 +1 2023-12-27 15:53:00 0 0 0 +1 2023-12-27 15:53:05 0 0 0 +1 2023-12-27 15:53:10 0 0 0 +1 2023-12-27 15:53:15 0 0 0 +1 2023-12-27 15:53:20 0 0 0 +1 2023-12-27 15:53:25 0 0 0 +2 2023-12-27 15:36:00 0 0 0 +2 2023-12-27 15:39:00 0 0 0 +2 2023-12-27 15:41:51 0 0 0 +2 2023-12-27 15:41:56 0 0 0 +2 2023-12-27 15:42:01 0 0 0 +2 2023-12-27 15:42:06 0 0 0 +2 2023-12-27 15:42:11 0 0 0 +2 2023-12-27 15:42:16 0 0 0 +2 2023-12-27 15:42:21 0 0 0 +2 2023-12-27 15:42:26 0 0 0 +2 2023-12-27 15:42:31 0 0 0 +2 2023-12-27 15:42:36 0 0 0 +2 2023-12-27 15:42:41 0 0 0 +2 2023-12-27 15:42:46 0 0 0 +2 2023-12-27 15:42:51 0 0 0 +2 2023-12-27 15:42:56 0 0 0 +2 2023-12-27 15:43:01 0 0 0 +2 2023-12-27 15:43:06 0 0 0 +2 2023-12-27 15:43:11 0 0 0 +2 2023-12-27 15:43:16 0 0 0 +2 2023-12-27 15:43:21 0 0 0 +2 2023-12-27 15:43:26 0 0 0 +2 2023-12-27 15:43:31 0 0 0 +2 2023-12-27 15:43:36 0 0 0 +2 2023-12-27 15:43:41 0 0 0 +2 2023-12-27 15:43:46 0 0 0 +2 2023-12-27 15:43:51 0 0 0 +2 2023-12-27 15:43:56 0 0 0 +2 2023-12-27 15:44:01 0 0 0 +2 2023-12-27 15:44:06 0 0 0 +2 2023-12-27 15:44:11 0 0 0 +2 2023-12-27 15:44:16 0 0 0 +2 2023-12-27 15:44:21 0 0 0 +2 2023-12-27 15:44:26 0 0 0 +2 2023-12-27 15:44:31 0 0 0 +2 2023-12-27 15:44:36 0 0 0 +2 2023-12-27 15:44:41 0 0 0 +2 2023-12-27 15:44:46 0 0 0 +2 2023-12-27 15:44:51 0 0 0 +2 2023-12-27 15:44:56 0 0 0 +2 2023-12-27 15:45:01 0 0 0 +2 2023-12-27 15:45:06 0 0 0 +2 2023-12-27 15:45:11 0 0 0 +2 2023-12-27 15:45:16 0 0 0 +2 2023-12-27 15:45:21 0 0 0 +2 2023-12-27 15:45:26 0 0 0 +2 2023-12-27 15:45:31 0 0 0 +2 2023-12-27 15:45:36 0 0 0 +2 2023-12-27 15:45:41 0 0 0 +2 2023-12-27 15:45:46 0 0 0 +2 2023-12-27 15:45:51 0 0 0 +2 2023-12-27 15:45:56 0 0 0 +2 2023-12-27 15:46:01 0 0 0 +2 2023-12-27 15:46:06 0 0 0 +2 2023-12-27 15:46:11 0 0 0 +2 2023-12-27 15:46:16 0 0 0 +2 2023-12-27 15:46:21 0 0 0 +2 2023-12-27 15:46:26 0 0 0 +2 2023-12-27 15:46:31 0 0 0 +2 2023-12-27 15:46:36 0 0 0 +2 2023-12-27 15:46:41 0 0 0 +2 2023-12-27 15:46:46 0 0 0 +2 2023-12-27 15:46:51 0 0 0 +2 2023-12-27 15:46:56 0 0 0 +2 2023-12-27 15:47:01 0 0 0 +2 2023-12-27 15:47:06 0 0 0 +2 2023-12-27 15:47:11 0 0 0 +2 2023-12-27 15:47:16 0 0 0 +2 2023-12-27 15:47:21 0 0 0 +2 2023-12-27 15:47:26 0 0 0 +2 2023-12-27 15:47:31 0 0 0 +2 2023-12-27 15:47:36 0 0 0 +2 2023-12-27 15:47:41 0 0 0 +2 2023-12-27 15:47:46 0 0 0 +2 2023-12-27 15:47:51 0 0 0 +2 2023-12-27 15:47:56 0 0 0 +2 2023-12-27 15:48:01 0 0 0 +2 2023-12-27 15:48:06 0 0 0 +2 2023-12-27 15:48:11 0 0 0 +2 2023-12-27 15:48:16 0 0 0 +2 2023-12-27 15:48:21 0 0 0 +2 2023-12-27 15:48:26 0 0 0 +2 2023-12-27 15:48:31 0 0 0 +2 2023-12-27 15:48:36 0 0 0 +2 2023-12-27 15:48:41 0 0 0 +2 2023-12-27 15:48:46 0 0 0 +2 2023-12-27 15:48:51 0 0 0 +2 2023-12-27 15:48:56 0 0 0 +2 2023-12-27 15:49:01 0 0 0 +2 2023-12-27 15:49:06 0 0 0 +2 2023-12-27 15:49:11 0 0 0 +2 2023-12-27 15:49:16 0 0 0 +2 2023-12-27 15:49:21 0 0 0 +2 2023-12-27 15:49:26 0 0 0 +2 2023-12-27 15:49:31 0 0 0 +2 2023-12-27 15:49:36 0 0 0 +2 2023-12-27 15:49:41 0 0 0 +2 2023-12-27 15:49:46 0 0 0 +2 2023-12-27 15:49:51 0 0 0 +2 2023-12-27 15:49:56 0 0 0 +2 2023-12-27 15:50:01 0 0 0 +2 2023-12-27 15:50:06 0 0 0 +2 2023-12-27 15:50:11 0 0 0 +2 2023-12-27 15:50:16 0 0 0 +2 2023-12-27 15:50:21 0 0 0 +2 2023-12-27 15:50:26 0 0 0 +2 2023-12-27 15:50:31 0 0 0 +2 2023-12-27 15:50:36 0 0 0 +2 2023-12-27 15:50:41 0 0 0 +2 2023-12-27 15:50:46 0 0 0 +2 2023-12-27 15:50:51 0 0 0 +2 2023-12-27 15:50:56 0 0 0 +2 2023-12-27 15:51:01 0 0 0 +2 2023-12-27 15:51:06 0 0 0 +2 2023-12-27 15:51:11 0 0 0 +2 2023-12-27 15:51:16 0 0 0 +2 2023-12-27 15:51:21 0 0 0 +2 2023-12-27 15:51:26 0 0 0 +2 2023-12-27 15:51:31 0 0 0 +2 2023-12-27 15:51:36 0 0 0 +2 2023-12-27 15:51:41 0 0 0 +2 2023-12-27 15:51:46 0 0 0 +2 2023-12-27 15:51:51 0 0 0 +2 2023-12-27 15:51:56 0 0 0 +2 2023-12-27 15:52:01 0 0 0 +2 2023-12-27 15:52:06 0 0 0 +2 2023-12-27 15:52:11 0 0 0 +2 2023-12-27 15:52:16 0 0 0 +2 2023-12-27 15:52:21 0 0 0 +2 2023-12-27 15:52:26 0 0 0 +2 2023-12-27 15:52:31 0 0 0 +2 2023-12-27 15:52:36 0 0 0 +2 2023-12-27 15:52:41 0 0 0 +2 2023-12-27 15:52:46 0 0 0 +2 2023-12-27 15:52:51 0 0 0 +2 2023-12-27 15:52:56 0 0 0 +2 2023-12-27 15:53:01 0 0 0 +2 2023-12-27 15:53:06 0 0 0 +2 2023-12-27 15:53:11 0 0 0 +2 2023-12-27 15:53:16 0 0 0 +2 2023-12-27 15:53:21 0 0 0 +2 2023-12-27 15:53:26 0 0 0 +3 2023-12-27 15:36:00 0 0 0 +3 2023-12-27 15:39:00 0 0 0 +3 2023-12-27 15:41:52 0 0 0 +3 2023-12-27 15:41:57 0 0 0 +3 2023-12-27 15:42:02 0 0 0 +3 2023-12-27 15:42:07 0 0 0 +3 2023-12-27 15:42:12 0 0 0 +3 2023-12-27 15:42:17 0 0 0 +3 2023-12-27 15:42:22 0 0 0 +3 2023-12-27 15:42:27 0 0 0 +3 2023-12-27 15:42:32 0 0 0 +3 2023-12-27 15:42:37 0 0 0 +3 2023-12-27 15:42:42 0 0 0 +3 2023-12-27 15:42:47 0 0 0 +3 2023-12-27 15:42:52 0 0 0 +3 2023-12-27 15:42:57 0 0 0 +3 2023-12-27 15:43:02 0 0 0 +3 2023-12-27 15:43:07 0 0 0 +3 2023-12-27 15:43:12 0 0 0 +3 2023-12-27 15:43:17 0 0 0 +3 2023-12-27 15:43:22 0 0 0 +3 2023-12-27 15:43:27 0 0 0 +3 2023-12-27 15:43:32 0 0 0 +3 2023-12-27 15:43:37 0 0 0 +3 2023-12-27 15:43:42 0 0 0 +3 2023-12-27 15:43:47 0 0 0 +3 2023-12-27 15:43:52 0 0 0 +3 2023-12-27 15:43:57 0 0 0 +3 2023-12-27 15:44:02 0 0 0 +3 2023-12-27 15:44:07 0 0 0 +3 2023-12-27 15:44:12 0 0 0 +3 2023-12-27 15:44:17 0 0 0 +3 2023-12-27 15:44:22 0 0 0 +3 2023-12-27 15:44:27 0 0 0 +3 2023-12-27 15:44:32 0 0 0 +3 2023-12-27 15:44:37 0 0 0 +3 2023-12-27 15:44:42 0 0 0 +3 2023-12-27 15:44:47 0 0 0 +3 2023-12-27 15:44:52 0 0 0 +3 2023-12-27 15:44:57 0 0 0 +3 2023-12-27 15:45:02 0 0 0 +3 2023-12-27 15:45:07 0 0 0 +3 2023-12-27 15:45:12 0 0 0 +3 2023-12-27 15:45:17 0 0 0 +3 2023-12-27 15:45:22 0 0 0 +3 2023-12-27 15:45:27 0 0 0 +3 2023-12-27 15:45:32 0 0 0 +3 2023-12-27 15:45:37 0 0 0 +3 2023-12-27 15:45:42 0 0 0 +3 2023-12-27 15:45:47 0 0 0 +3 2023-12-27 15:45:52 0 0 0 +3 2023-12-27 15:45:57 0 0 0 +3 2023-12-27 15:46:02 0 0 0 +3 2023-12-27 15:46:07 0 0 0 +3 2023-12-27 15:46:12 0 0 0 +3 2023-12-27 15:46:17 0 0 0 +3 2023-12-27 15:46:22 0 0 0 +3 2023-12-27 15:46:27 0 0 0 +3 2023-12-27 15:46:32 0 0 0 +3 2023-12-27 15:46:37 0 0 0 +3 2023-12-27 15:46:42 0 0 0 +3 2023-12-27 15:46:47 0 0 0 +3 2023-12-27 15:46:52 0 0 0 +3 2023-12-27 15:46:57 0 0 0 +3 2023-12-27 15:47:02 0 0 0 +3 2023-12-27 15:47:07 0 0 0 +3 2023-12-27 15:47:12 0 0 0 +3 2023-12-27 15:47:17 0 0 0 +3 2023-12-27 15:47:22 0 0 0 +3 2023-12-27 15:47:27 0 0 0 +3 2023-12-27 15:47:32 0 0 0 +3 2023-12-27 15:47:37 0 0 0 +3 2023-12-27 15:47:42 0 0 0 +3 2023-12-27 15:47:47 0 0 0 +3 2023-12-27 15:47:52 0 0 0 +3 2023-12-27 15:47:57 0 0 0 +3 2023-12-27 15:48:02 0 0 0 +3 2023-12-27 15:48:07 0 0 0 +3 2023-12-27 15:48:12 0 0 0 +3 2023-12-27 15:48:17 0 0 0 +3 2023-12-27 15:48:22 0 0 0 +3 2023-12-27 15:48:27 0 0 0 +3 2023-12-27 15:48:32 0 0 0 +3 2023-12-27 15:48:37 0 0 0 +3 2023-12-27 15:48:42 0 0 0 +3 2023-12-27 15:48:47 0 0 0 +3 2023-12-27 15:48:52 0 0 0 +3 2023-12-27 15:48:57 0 0 0 +3 2023-12-27 15:49:02 0 0 0 +3 2023-12-27 15:49:07 0 0 0 +3 2023-12-27 15:49:12 0 0 0 +3 2023-12-27 15:49:17 0 0 0 +3 2023-12-27 15:49:22 0 0 0 +3 2023-12-27 15:49:27 0 0 0 +3 2023-12-27 15:49:32 0 0 0 +3 2023-12-27 15:49:37 0 0 0 +3 2023-12-27 15:49:42 0 0 0 +3 2023-12-27 15:49:47 0 0 0 +3 2023-12-27 15:49:52 0 0 0 +3 2023-12-27 15:49:57 0 0 0 +3 2023-12-27 15:50:02 0 0 0 +3 2023-12-27 15:50:07 0 0 0 +3 2023-12-27 15:50:12 0 0 0 +3 2023-12-27 15:50:17 0 0 0 +3 2023-12-27 15:50:22 0 0 0 +3 2023-12-27 15:50:27 0 0 0 +3 2023-12-27 15:50:32 0 0 0 +3 2023-12-27 15:50:37 0 0 0 +3 2023-12-27 15:50:42 0 0 0 +3 2023-12-27 15:50:47 0 0 0 +3 2023-12-27 15:50:52 0 0 0 +3 2023-12-27 15:50:57 0 0 0 +3 2023-12-27 15:51:02 0 0 0 +3 2023-12-27 15:51:07 0 0 0 +3 2023-12-27 15:51:12 0 0 0 +3 2023-12-27 15:51:17 0 0 0 +3 2023-12-27 15:51:22 0 0 0 +3 2023-12-27 15:51:27 0 0 0 +3 2023-12-27 15:51:32 0 0 0 +3 2023-12-27 15:51:37 0 0 0 +3 2023-12-27 15:51:42 0 0 0 +3 2023-12-27 15:51:47 0 0 0 +3 2023-12-27 15:51:52 0 0 0 +3 2023-12-27 15:51:57 0 0 0 +3 2023-12-27 15:52:02 0 0 0 +3 2023-12-27 15:52:07 0 0 0 +3 2023-12-27 15:52:12 0 0 0 +3 2023-12-27 15:52:17 0 0 0 +3 2023-12-27 15:52:22 0 0 0 +3 2023-12-27 15:52:27 0 0 0 +3 2023-12-27 15:52:32 0 0 0 +3 2023-12-27 15:52:37 0 0 0 +3 2023-12-27 15:52:42 0 0 0 +3 2023-12-27 15:52:47 0 0 0 +3 2023-12-27 15:52:52 0 0 0 +3 2023-12-27 15:52:57 0 0 0 +3 2023-12-27 15:53:02 0 0 0 +3 2023-12-27 15:53:07 0 0 0 +3 2023-12-27 15:53:12 0 0 0 +3 2023-12-27 15:53:17 0 0 0 +3 2023-12-27 15:53:22 0 0 0 +3 2023-12-27 15:53:27 0 0 0 +4 2023-12-27 15:36:00 0 0 0 +4 2023-12-27 15:39:00 0 0 0 +4 2023-12-27 15:41:53 0 0 0 +4 2023-12-27 15:41:58 0 0 0 +4 2023-12-27 15:42:03 0 0 0 +4 2023-12-27 15:42:08 0 0 0 +4 2023-12-27 15:42:13 0 0 0 +4 2023-12-27 15:42:18 0 0 0 +4 2023-12-27 15:42:23 0 0 0 +4 2023-12-27 15:42:28 0 0 0 +4 2023-12-27 15:42:33 0 0 0 +4 2023-12-27 15:42:38 0 0 0 +4 2023-12-27 15:42:43 0 0 0 +4 2023-12-27 15:42:48 0 0 0 +4 2023-12-27 15:42:53 0 0 0 +4 2023-12-27 15:42:58 0 0 0 +4 2023-12-27 15:43:03 0 0 0 +4 2023-12-27 15:43:08 0 0 0 +4 2023-12-27 15:43:13 0 0 0 +4 2023-12-27 15:43:18 0 0 0 +4 2023-12-27 15:43:23 0 0 0 +4 2023-12-27 15:43:28 0 0 0 +4 2023-12-27 15:43:33 0 0 0 +4 2023-12-27 15:43:38 0 0 0 +4 2023-12-27 15:43:43 0 0 0 +4 2023-12-27 15:43:48 0 0 0 +4 2023-12-27 15:43:53 0 0 0 +4 2023-12-27 15:43:58 0 0 0 +4 2023-12-27 15:44:03 0 0 0 +4 2023-12-27 15:44:08 0 0 0 +4 2023-12-27 15:44:13 0 0 0 +4 2023-12-27 15:44:18 0 0 0 +4 2023-12-27 15:44:23 0 0 0 +4 2023-12-27 15:44:28 0 0 0 +4 2023-12-27 15:44:33 0 0 0 +4 2023-12-27 15:44:38 0 0 0 +4 2023-12-27 15:44:43 0 0 0 +4 2023-12-27 15:44:48 0 0 0 +4 2023-12-27 15:44:53 0 0 0 +4 2023-12-27 15:44:58 0 0 0 +4 2023-12-27 15:45:03 0 0 0 +4 2023-12-27 15:45:08 0 0 0 +4 2023-12-27 15:45:13 0 0 0 +4 2023-12-27 15:45:18 0 0 0 +4 2023-12-27 15:45:23 0 0 0 +4 2023-12-27 15:45:28 0 0 0 +4 2023-12-27 15:45:33 0 0 0 +4 2023-12-27 15:45:38 0 0 0 +4 2023-12-27 15:45:43 0 0 0 +4 2023-12-27 15:45:48 0 0 0 +4 2023-12-27 15:45:53 0 0 0 +4 2023-12-27 15:45:58 0 0 0 +4 2023-12-27 15:46:03 0 0 0 +4 2023-12-27 15:46:08 0 0 0 +4 2023-12-27 15:46:13 0 0 0 +4 2023-12-27 15:46:18 0 0 0 +4 2023-12-27 15:46:23 0 0 0 +4 2023-12-27 15:46:28 0 0 0 +4 2023-12-27 15:46:33 0 0 0 +4 2023-12-27 15:46:38 0 0 0 +4 2023-12-27 15:46:43 0 0 0 +4 2023-12-27 15:46:48 0 0 0 +4 2023-12-27 15:46:53 0 0 0 +4 2023-12-27 15:46:58 0 0 0 +4 2023-12-27 15:47:03 0 0 0 +4 2023-12-27 15:47:08 0 0 0 +4 2023-12-27 15:47:13 0 0 0 +4 2023-12-27 15:47:18 0 0 0 +4 2023-12-27 15:47:23 0 0 0 +4 2023-12-27 15:47:28 0 0 0 +4 2023-12-27 15:47:33 0 0 0 +4 2023-12-27 15:47:38 0 0 0 +4 2023-12-27 15:47:43 0 0 0 +4 2023-12-27 15:47:48 0 0 0 +4 2023-12-27 15:47:53 0 0 0 +4 2023-12-27 15:47:58 0 0 0 +4 2023-12-27 15:48:03 0 0 0 +4 2023-12-27 15:48:08 0 0 0 +4 2023-12-27 15:48:13 0 0 0 +4 2023-12-27 15:48:18 0 0 0 +4 2023-12-27 15:48:23 0 0 0 +4 2023-12-27 15:48:28 0 0 0 +4 2023-12-27 15:48:33 0 0 0 +4 2023-12-27 15:48:38 0 0 0 +4 2023-12-27 15:48:43 0 0 0 +4 2023-12-27 15:48:48 0 0 0 +4 2023-12-27 15:48:53 0 0 0 +4 2023-12-27 15:48:58 0 0 0 +4 2023-12-27 15:49:03 0 0 0 +4 2023-12-27 15:49:08 0 0 0 +4 2023-12-27 15:49:13 0 0 0 +4 2023-12-27 15:49:18 0 0 0 +4 2023-12-27 15:49:23 0 0 0 +4 2023-12-27 15:49:28 0 0 0 +4 2023-12-27 15:49:33 0 0 0 +4 2023-12-27 15:49:38 0 0 0 +4 2023-12-27 15:49:43 0 0 0 +4 2023-12-27 15:49:48 0 0 0 +4 2023-12-27 15:49:53 0 0 0 +4 2023-12-27 15:49:58 0 0 0 +4 2023-12-27 15:50:03 0 0 0 +4 2023-12-27 15:50:08 0 0 0 +4 2023-12-27 15:50:13 0 0 0 +4 2023-12-27 15:50:18 0 0 0 +4 2023-12-27 15:50:23 0 0 0 +4 2023-12-27 15:50:28 0 0 0 +4 2023-12-27 15:50:33 0 0 0 +4 2023-12-27 15:50:38 0 0 0 +4 2023-12-27 15:50:43 0 0 0 +4 2023-12-27 15:50:48 0 0 0 +4 2023-12-27 15:50:53 0 0 0 +4 2023-12-27 15:50:58 0 0 0 +4 2023-12-27 15:51:03 0 0 0 +4 2023-12-27 15:51:08 0 0 0 +4 2023-12-27 15:51:13 0 0 0 +4 2023-12-27 15:51:18 0 0 0 +4 2023-12-27 15:51:23 0 0 0 +4 2023-12-27 15:51:28 0 0 0 +4 2023-12-27 15:51:33 0 0 0 +4 2023-12-27 15:51:38 0 0 0 +4 2023-12-27 15:51:43 0 0 0 +4 2023-12-27 15:51:48 0 0 0 +4 2023-12-27 15:51:53 0 0 0 +4 2023-12-27 15:51:58 0 0 0 +4 2023-12-27 15:52:03 0 0 0 +4 2023-12-27 15:52:08 0 0 0 +4 2023-12-27 15:52:13 0 0 0 +4 2023-12-27 15:52:18 0 0 0 +4 2023-12-27 15:52:23 0 0 0 +4 2023-12-27 15:52:28 0 0 0 +4 2023-12-27 15:52:33 0 0 0 +4 2023-12-27 15:52:38 0 0 0 +4 2023-12-27 15:52:43 0 0 0 +4 2023-12-27 15:52:48 0 0 0 +4 2023-12-27 15:52:53 0 0 0 +4 2023-12-27 15:52:58 0 0 0 +4 2023-12-27 15:53:03 0 0 0 +4 2023-12-27 15:53:08 0 0 0 +4 2023-12-27 15:53:13 0 0 0 +4 2023-12-27 15:53:18 0 0 0 +4 2023-12-27 15:53:23 0 0 0 +4 2023-12-27 15:53:28 0 0 0 diff --git a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql new file mode 100644 index 00000000000..d1019946e2a --- /dev/null +++ b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS ttl_group_by_bug; + +CREATE TABLE ttl_group_by_bug +(key UInt32, ts DateTime, value UInt32, min_value UInt32 default value, max_value UInt32 default value) +ENGINE = MergeTree() PARTITION BY toYYYYMM(ts) +ORDER BY (key, toStartOfInterval(ts, toIntervalMinute(3)), ts) +TTL ts + INTERVAL 5 MINUTE GROUP BY key, toStartOfInterval(ts, toIntervalMinute(3)) +SET value = sum(value), min_value = min(min_value), max_value = max(max_value), ts=min(toStartOfInterval(ts, toIntervalMinute(3))); + +INSERT INTO ttl_group_by_bug(key, ts, value) SELECT number%5 as key, now() - interval 10 minute + number, 0 FROM numbers(1000); + +SELECT * +FROM +( + SELECT + _part, + rowNumberInAllBlocks(), + (key, toStartOfInterval(ts, toIntervalMinute(3)), ts) AS cur, + lagInFrame((key, toStartOfInterval(ts, toIntervalMinute(3)), ts), 1) OVER () AS prev, + 1 + FROM ttl_group_by_bug +) +WHERE cur < prev +LIMIT 2 +SETTINGS max_threads = 1; + +-- I would like just to check whether the sorting order is not broken. +SELECT * +FROM ttl_group_by_bug +SETTINGS max_threads=1; + +DROP TABLE IF EXISTS ttl_group_by_bug; From a3fa3784017b0512309771d1bb2ae870c65cc4b1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 27 Dec 2023 17:02:52 +0100 Subject: [PATCH 33/88] Update TTLAggregationAlgorithm.cpp --- src/Processors/TTL/TTLAggregationAlgorithm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/TTL/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp index 72cd973aa06..2537e21dd40 100644 --- a/src/Processors/TTL/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -100,8 +100,8 @@ void TTLAggregationAlgorithm::execute(Block & block) } } - /// We are observing the row with new aggregation key. - /// In this case we definitely need to finish the current aggregation for the previuos key + /// We are observing the row with new the aggregation key. + /// In this case we definitely need to finish the current aggregation for the previuos key and /// write results to `result_columns`. const bool observing_new_key = !same_as_current; /// We are observing the row with the same aggregation key, but TTL is not expired anymore. From 84727e986ea86bccca6ed19306e8eb2bdd41c03a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 27 Dec 2023 17:05:37 +0100 Subject: [PATCH 34/88] Add test for #57086 --- .../02954_analyzer_fuzz_i57086.reference | 2 ++ .../0_stateless/02954_analyzer_fuzz_i57086.sql | 15 +++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/02954_analyzer_fuzz_i57086.reference create mode 100644 tests/queries/0_stateless/02954_analyzer_fuzz_i57086.sql diff --git a/tests/queries/0_stateless/02954_analyzer_fuzz_i57086.reference b/tests/queries/0_stateless/02954_analyzer_fuzz_i57086.reference new file mode 100644 index 00000000000..f2386499865 --- /dev/null +++ b/tests/queries/0_stateless/02954_analyzer_fuzz_i57086.reference @@ -0,0 +1,2 @@ +limit w/ GROUP BY 0 0 +limit w/ GROUP BY 0 0 diff --git a/tests/queries/0_stateless/02954_analyzer_fuzz_i57086.sql b/tests/queries/0_stateless/02954_analyzer_fuzz_i57086.sql new file mode 100644 index 00000000000..a8029fdd3d6 --- /dev/null +++ b/tests/queries/0_stateless/02954_analyzer_fuzz_i57086.sql @@ -0,0 +1,15 @@ +--https://github.com/ClickHouse/ClickHouse/issues/57086 +SELECT + 'limit w/ GROUP BY', + count(NULL), + number +FROM remote('127.{1,2}', view( + SELECT intDiv(number, 2147483647) AS number + FROM numbers(10) + )) +GROUP BY number +WITH ROLLUP +ORDER BY + count() ASC, + number DESC NULLS LAST + SETTINGS limit = 2, allow_experimental_analyzer = 1; From 09599647e73131823be688f008117f26c04b6c8f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 18:55:53 +0100 Subject: [PATCH 35/88] Move some code from PODArray into implementation part Signed-off-by: Azat Khuzhin --- src/Common/PODArray.cpp | 39 +++++++++++++++ src/Common/PODArray.h | 106 +++++++++++++++++----------------------- 2 files changed, 84 insertions(+), 61 deletions(-) diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index d21dc40867d..dd1fed08cb5 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -1,8 +1,46 @@ +#include #include + namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_MPROTECT; + extern const int CANNOT_ALLOCATE_MEMORY; +} + +namespace PODArrayDetails +{ + +#ifndef NDEBUG +void protectMemoryRegion(void * addr, size_t len, int prot) +{ + if (0 != mprotect(addr, len, prot)) + throw ErrnoException(ErrorCodes::CANNOT_MPROTECT, "Cannot mprotect memory region"); +} +#endif + +size_t byte_size(size_t num_elements, size_t element_size) +{ + size_t amount; + if (__builtin_mul_overflow(num_elements, element_size, &amount)) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Amount of memory requested to allocate is more than allowed"); + return amount; +} + +size_t minimum_memory_for_elements(size_t num_elements, size_t element_size, size_t pad_left, size_t pad_right) +{ + size_t amount; + if (__builtin_add_overflow(byte_size(num_elements, element_size), pad_left + pad_right, &amount)) + throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Amount of memory requested to allocate is more than allowed"); + return amount; +} + +} + + /// Used for left padding of PODArray when empty const char empty_pod_array[empty_pod_array_size]{}; @@ -25,4 +63,5 @@ template class PODArray, 0, 0>; template class PODArray, 0, 0>; template class PODArray, 0, 0>; template class PODArray, 0, 0>; + } diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 35353229303..6a048d1c6c0 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -1,25 +1,21 @@ #pragma once +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include - -#include -#include -#include -#include - #ifndef NDEBUG - #include +#include #endif -#include - /** Whether we can use memcpy instead of a loop with assignment to T from U. * It is Ok if types are the same. And if types are integral and of the same size, * example: char, signed char, unsigned char. @@ -33,12 +29,6 @@ constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v namespace DB { -namespace ErrorCodes -{ - extern const int CANNOT_MPROTECT; - extern const int CANNOT_ALLOCATE_MEMORY; -} - /** A dynamic array for POD types. * Designed for a small number of large arrays (rather than a lot of small ones). * To be more precise - for use in ColumnVector. @@ -75,6 +65,19 @@ namespace ErrorCodes static constexpr size_t empty_pod_array_size = 1024; extern const char empty_pod_array[empty_pod_array_size]; +namespace PODArrayDetails +{ + +void protectMemoryRegion(void * addr, size_t len, int prot); + +/// The amount of memory occupied by the num_elements of the elements. +size_t byte_size(size_t num_elements, size_t element_size); /// NOLINT + +/// Minimum amount of memory to allocate for num_elements, including padding. +size_t minimum_memory_for_elements(size_t num_elements, size_t element_size, size_t pad_left, size_t pad_right); /// NOLINT + +}; + /** Base class that depend only on size of element, not on element itself. * You can static_cast to this class if you want to insert some data regardless to the actual type T. */ @@ -100,27 +103,9 @@ protected: char * c_end = null; char * c_end_of_storage = null; /// Does not include pad_right. - /// The amount of memory occupied by the num_elements of the elements. - static size_t byte_size(size_t num_elements) /// NOLINT - { - size_t amount; - if (__builtin_mul_overflow(num_elements, ELEMENT_SIZE, &amount)) - throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Amount of memory requested to allocate is more than allowed"); - return amount; - } - - /// Minimum amount of memory to allocate for num_elements, including padding. - static size_t minimum_memory_for_elements(size_t num_elements) - { - size_t amount; - if (__builtin_add_overflow(byte_size(num_elements), pad_left + pad_right, &amount)) - throw Exception(ErrorCodes::CANNOT_ALLOCATE_MEMORY, "Amount of memory requested to allocate is more than allowed"); - return amount; - } - void alloc_for_num_elements(size_t num_elements) /// NOLINT { - alloc(minimum_memory_for_elements(num_elements)); + alloc(PODArrayDetails::minimum_memory_for_elements(num_elements, ELEMENT_SIZE, pad_left, pad_right)); } template @@ -186,7 +171,7 @@ protected: // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise, // memory issue such as corruption could appear in edge case. realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE), - minimum_memory_for_elements(1)), + PODArrayDetails::minimum_memory_for_elements(1, ELEMENT_SIZE, pad_left, pad_right)), std::forward(allocator_params)...); } else @@ -206,8 +191,7 @@ protected: if (right_rounded_down > left_rounded_up) { size_t length = right_rounded_down - left_rounded_up; - if (0 != mprotect(left_rounded_up, length, prot)) - throw ErrnoException(ErrorCodes::CANNOT_MPROTECT, "Cannot mprotect memory region"); + PODArrayDetails::protectMemoryRegion(left_rounded_up, length, prot); } } @@ -230,14 +214,14 @@ public: void reserve(size_t n, TAllocatorParams &&... allocator_params) { if (n > capacity()) - realloc(roundUpToPowerOfTwoOrZero(minimum_memory_for_elements(n)), std::forward(allocator_params)...); + realloc(roundUpToPowerOfTwoOrZero(PODArrayDetails::minimum_memory_for_elements(n, ELEMENT_SIZE, pad_left, pad_right)), std::forward(allocator_params)...); } template void reserve_exact(size_t n, TAllocatorParams &&... allocator_params) /// NOLINT { if (n > capacity()) - realloc(minimum_memory_for_elements(n), std::forward(allocator_params)...); + realloc(PODArrayDetails::minimum_memory_for_elements(n, ELEMENT_SIZE, pad_left, pad_right), std::forward(allocator_params)...); } template @@ -256,7 +240,7 @@ public: void resize_assume_reserved(const size_t n) /// NOLINT { - c_end = c_start + byte_size(n); + c_end = c_start + PODArrayDetails::byte_size(n, ELEMENT_SIZE); } const char * raw_data() const /// NOLINT @@ -337,7 +321,7 @@ public: explicit PODArray(size_t n) { this->alloc_for_num_elements(n); - this->c_end += this->byte_size(n); + this->c_end += PODArrayDetails::byte_size(n, sizeof(T)); } PODArray(size_t n, const T & x) @@ -409,9 +393,9 @@ public: if (n > old_size) { this->reserve(n); - memset(this->c_end, 0, this->byte_size(n - old_size)); + memset(this->c_end, 0, PODArrayDetails::byte_size(n - old_size, sizeof(T))); } - this->c_end = this->c_start + this->byte_size(n); + this->c_end = this->c_start + PODArrayDetails::byte_size(n, sizeof(T)); } void resize_fill(size_t n, const T & value) /// NOLINT @@ -422,7 +406,7 @@ public: this->reserve(n); std::fill(t_end(), t_end() + n - old_size, value); } - this->c_end = this->c_start + this->byte_size(n); + this->c_end = this->c_start + PODArrayDetails::byte_size(n, sizeof(T)); } template @@ -485,7 +469,7 @@ public: if (required_capacity > this->capacity()) this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward(allocator_params)...); - size_t bytes_to_copy = this->byte_size(from_end - from_begin); + size_t bytes_to_copy = PODArrayDetails::byte_size(from_end - from_begin, sizeof(T)); if (bytes_to_copy) { memcpy(this->c_end, reinterpret_cast(rhs.begin() + from_begin), bytes_to_copy); @@ -500,7 +484,7 @@ public: static_assert(pad_right_ >= PADDING_FOR_SIMD - 1); static_assert(sizeof(T) == sizeof(*from_begin)); insertPrepare(from_begin, from_end, std::forward(allocator_params)...); - size_t bytes_to_copy = this->byte_size(from_end - from_begin); + size_t bytes_to_copy = PODArrayDetails::byte_size(from_end - from_begin, sizeof(T)); memcpySmallAllowReadWriteOverflow15(this->c_end, reinterpret_cast(&*from_begin), bytes_to_copy); this->c_end += bytes_to_copy; } @@ -511,11 +495,11 @@ public: { static_assert(memcpy_can_be_used_for_assignment, std::decay_t>); - size_t bytes_to_copy = this->byte_size(from_end - from_begin); + size_t bytes_to_copy = PODArrayDetails::byte_size(from_end - from_begin, sizeof(T)); if (!bytes_to_copy) return; - size_t bytes_to_move = this->byte_size(end() - it); + size_t bytes_to_move = PODArrayDetails::byte_size(end() - it, sizeof(T)); insertPrepare(from_begin, from_end); @@ -543,10 +527,10 @@ public: if (required_capacity > this->capacity()) this->reserve(roundUpToPowerOfTwoOrZero(required_capacity), std::forward(allocator_params)...); - size_t bytes_to_copy = this->byte_size(copy_size); + size_t bytes_to_copy = PODArrayDetails::byte_size(copy_size, sizeof(T)); if (bytes_to_copy) { - auto begin = this->c_start + this->byte_size(start_index); + auto begin = this->c_start + PODArrayDetails::byte_size(start_index, sizeof(T)); memcpy(this->c_end, reinterpret_cast(&*begin), bytes_to_copy); this->c_end += bytes_to_copy; } @@ -558,7 +542,7 @@ public: static_assert(memcpy_can_be_used_for_assignment, std::decay_t>); this->assertNotIntersects(from_begin, from_end); - size_t bytes_to_copy = this->byte_size(from_end - from_begin); + size_t bytes_to_copy = PODArrayDetails::byte_size(from_end - from_begin, sizeof(T)); if (bytes_to_copy) { memcpy(this->c_end, reinterpret_cast(&*from_begin), bytes_to_copy); @@ -591,13 +575,13 @@ public: /// arr1 takes ownership of the heap memory of arr2. arr1.c_start = arr2.c_start; arr1.c_end_of_storage = arr1.c_start + heap_allocated - arr2.pad_right - arr2.pad_left; - arr1.c_end = arr1.c_start + this->byte_size(heap_size); + arr1.c_end = arr1.c_start + PODArrayDetails::byte_size(heap_size, sizeof(T)); /// Allocate stack space for arr2. arr2.alloc(stack_allocated, std::forward(allocator_params)...); /// Copy the stack content. - memcpy(arr2.c_start, stack_c_start, this->byte_size(stack_size)); - arr2.c_end = arr2.c_start + this->byte_size(stack_size); + memcpy(arr2.c_start, stack_c_start, PODArrayDetails::byte_size(stack_size, sizeof(T))); + arr2.c_end = arr2.c_start + PODArrayDetails::byte_size(stack_size, sizeof(T)); }; auto do_move = [&](PODArray & src, PODArray & dest) @@ -606,8 +590,8 @@ public: { dest.dealloc(); dest.alloc(src.allocated_bytes(), std::forward(allocator_params)...); - memcpy(dest.c_start, src.c_start, this->byte_size(src.size())); - dest.c_end = dest.c_start + this->byte_size(src.size()); + memcpy(dest.c_start, src.c_start, PODArrayDetails::byte_size(src.size(), sizeof(T))); + dest.c_end = dest.c_start + PODArrayDetails::byte_size(src.size(), sizeof(T)); src.c_start = Base::null; src.c_end = Base::null; @@ -664,8 +648,8 @@ public: this->c_end_of_storage = this->c_start + rhs_allocated - Base::pad_right - Base::pad_left; rhs.c_end_of_storage = rhs.c_start + lhs_allocated - Base::pad_right - Base::pad_left; - this->c_end = this->c_start + this->byte_size(rhs_size); - rhs.c_end = rhs.c_start + this->byte_size(lhs_size); + this->c_end = this->c_start + PODArrayDetails::byte_size(rhs_size, sizeof(T)); + rhs.c_end = rhs.c_start + PODArrayDetails::byte_size(lhs_size, sizeof(T)); } else if (this->isAllocatedFromStack() && !rhs.isAllocatedFromStack()) { @@ -700,7 +684,7 @@ public: if (required_capacity > this->capacity()) this->reserve_exact(required_capacity, std::forward(allocator_params)...); - size_t bytes_to_copy = this->byte_size(required_capacity); + size_t bytes_to_copy = PODArrayDetails::byte_size(required_capacity, sizeof(T)); if (bytes_to_copy) memcpy(this->c_start, reinterpret_cast(&*from_begin), bytes_to_copy); From cf1dae532130f9581ec1593e59b73e8f2365f85b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 19:25:49 +0100 Subject: [PATCH 36/88] Add missing includes into BitHelpers.h Signed-off-by: Azat Khuzhin --- src/Common/BitHelpers.h | 1 + src/IO/BitHelpers.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Common/BitHelpers.h b/src/Common/BitHelpers.h index 79c612d47e4..bb81d271140 100644 --- a/src/Common/BitHelpers.h +++ b/src/Common/BitHelpers.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h index a9c7343f991..45c9b1ba572 100644 --- a/src/IO/BitHelpers.h +++ b/src/IO/BitHelpers.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include From cc296a1b8a550130a6fe4fc12e1e00bd56b798b6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 19:31:03 +0100 Subject: [PATCH 37/88] Move some code of ThreadPool into module part Signed-off-by: Azat Khuzhin --- src/Common/ThreadPool.cpp | 34 ++++++++++++++++++++++++++++++++++ src/Common/ThreadPool.h | 34 ++-------------------------------- 2 files changed, 36 insertions(+), 32 deletions(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 565affb0c65..3c2e6228421 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -28,6 +28,40 @@ namespace CurrentMetrics extern const Metric GlobalThreadScheduled; } +class JobWithPriority +{ +public: + using Job = std::function; + + Job job; + Priority priority; + CurrentMetrics::Increment metric_increment; + DB::OpenTelemetry::TracingContextOnThread thread_trace_context; + + /// Call stacks of all jobs' schedulings leading to this one + std::vector frame_pointers; + bool enable_job_stack_trace = false; + + JobWithPriority( + Job job_, Priority priority_, CurrentMetrics::Metric metric, + const DB::OpenTelemetry::TracingContextOnThread & thread_trace_context_, + bool capture_frame_pointers) + : job(job_), priority(priority_), metric_increment(metric), + thread_trace_context(thread_trace_context_), enable_job_stack_trace(capture_frame_pointers) + { + if (!capture_frame_pointers) + return; + /// Save all previous jobs call stacks and append with current + frame_pointers = DB::Exception::thread_frame_pointers; + frame_pointers.push_back(StackTrace().getFramePointers()); + } + + bool operator<(const JobWithPriority & rhs) const + { + return priority > rhs.priority; // Reversed for `priority_queue` max-heap to yield minimum value (i.e. highest priority) first + } +}; + static constexpr auto DEFAULT_THREAD_NAME = "ThreadPool"; template diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 3117509ab8f..31e4eabf63b 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -20,9 +20,10 @@ #include #include #include -#include #include +class JobWithPriority; + /** Very simple thread pool similar to boost::threadpool. * Advantages: * - catches exceptions and rethrows on wait. @@ -128,37 +129,6 @@ private: bool threads_remove_themselves = true; const bool shutdown_on_exception = true; - struct JobWithPriority - { - Job job; - Priority priority; - CurrentMetrics::Increment metric_increment; - DB::OpenTelemetry::TracingContextOnThread thread_trace_context; - - /// Call stacks of all jobs' schedulings leading to this one - std::vector frame_pointers; - bool enable_job_stack_trace = false; - - JobWithPriority( - Job job_, Priority priority_, CurrentMetrics::Metric metric, - const DB::OpenTelemetry::TracingContextOnThread & thread_trace_context_, - bool capture_frame_pointers) - : job(job_), priority(priority_), metric_increment(metric), - thread_trace_context(thread_trace_context_), enable_job_stack_trace(capture_frame_pointers) - { - if (!capture_frame_pointers) - return; - /// Save all previous jobs call stacks and append with current - frame_pointers = DB::Exception::thread_frame_pointers; - frame_pointers.push_back(StackTrace().getFramePointers()); - } - - bool operator<(const JobWithPriority & rhs) const - { - return priority > rhs.priority; // Reversed for `priority_queue` max-heap to yield minimum value (i.e. highest priority) first - } - }; - boost::heap::priority_queue jobs; std::list threads; std::exception_ptr first_exception; From f35f1537196aa95d47a177aa0e276144cd637f7b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 19:32:46 +0100 Subject: [PATCH 38/88] Add missing includes into ThreadStatus Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 101a56cd620..c99823b2dfa 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include From c6774cfe42207f3dfb1389ac6619b92987919975 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 19:33:44 +0100 Subject: [PATCH 39/88] Add missing includes for ZipArchiveWriter Signed-off-by: Azat Khuzhin --- src/IO/Archives/ZipArchiveWriter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index af6c87e8c88..785a5005f87 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include namespace DB From 058001eb3145fc1d6aa3d177ee72dbe6a1a06ea6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 24 Dec 2023 19:37:12 +0100 Subject: [PATCH 40/88] Add missing includes into CompressionCodecDeflateQpl Signed-off-by: Azat Khuzhin --- src/Compression/CompressionCodecDeflateQpl.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Compression/CompressionCodecDeflateQpl.cpp b/src/Compression/CompressionCodecDeflateQpl.cpp index 25d809c9726..ee0356adde5 100644 --- a/src/Compression/CompressionCodecDeflateQpl.cpp +++ b/src/Compression/CompressionCodecDeflateQpl.cpp @@ -11,6 +11,7 @@ #include "libaccel_config.h" #include #include +#include #include From 217d24ab229938246d8b3f56a79a32fdfdb9c613 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 27 Dec 2023 11:27:16 +0100 Subject: [PATCH 41/88] Add missing includes for memcpySmall Signed-off-by: Azat Khuzhin --- src/Common/memcpySmall.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/memcpySmall.h b/src/Common/memcpySmall.h index 5eaa1203f05..0c2aee96250 100644 --- a/src/Common/memcpySmall.h +++ b/src/Common/memcpySmall.h @@ -1,6 +1,7 @@ #pragma once #include +#include /// ssize_t #ifdef __SSE2__ # include From fee2eadaf0f56e24d3b823999f7f81b21e90158a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Dec 2023 16:16:41 +0000 Subject: [PATCH 42/88] Fix parallel parsing for JSONCompactEachRow --- src/Formats/registerFormats.cpp | 2 ++ .../Impl/JSONCompactEachRowRowInputFormat.cpp | 28 ++++++++++++++++++- .../Impl/JSONCompactEachRowRowInputFormat.h | 1 + .../RowInputFormatWithNamesAndTypes.cpp | 2 +- 4 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 7c7ccac8b01..cc9cf380693 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -14,6 +14,7 @@ void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); void registerFileSegmentationEngineRegexp(FormatFactory & factory); void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory); +void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory); #if USE_HIVE void registerFileSegmentationEngineHiveText(FormatFactory & factory); #endif @@ -160,6 +161,7 @@ void registerFormats() registerFileSegmentationEngineJSONEachRow(factory); registerFileSegmentationEngineJSONAsString(factory); registerFileSegmentationEngineJSONAsObject(factory); + registerFileSegmentationEngineJSONCompactEachRow(factory); #if USE_HIVE registerFileSegmentationEngineHiveText(factory); #endif diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 08dc9e2d511..6fbd9d7ad22 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -60,12 +60,15 @@ void JSONCompactEachRowFormatReader::skipFieldDelimiter() void JSONCompactEachRowFormatReader::skipRowEndDelimiter() { + skipWhitespaceIfAny(*in); JSONUtils::skipArrayEnd(*in); +} +void JSONCompactEachRowFormatReader::skipRowBetweenDelimiter() +{ skipWhitespaceIfAny(*in); if (!in->eof() && (*in->position() == ',' || *in->position() == ';')) ++in->position(); - skipWhitespaceIfAny(*in); } @@ -91,6 +94,10 @@ void JSONCompactEachRowFormatReader::skipHeaderRow() bool JSONCompactEachRowFormatReader::checkForSuffix() { skipWhitespaceIfAny(*in); + /// Allow ',' and ';' after the last row. + if (!in->eof() && (*in->position() == ',' || *in->position() == ';')) + ++in->position(); + skipWhitespaceIfAny(*in); return in->eof(); } @@ -284,4 +291,23 @@ void registerJSONCompactEachRowSchemaReader(FormatFactory & factory) } } +void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory) +{ + auto register_func = [&](const String & format_name, bool with_names, bool with_types) + { + /// In case when we have names and/or types in the first two/one rows, + /// we need to read at least one more row of actual data. So, set + /// the minimum of rows for segmentation engine according to + /// parameters with_names and with_types. + size_t min_rows = 1 + int(with_names) + int(with_types); + factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) + { + return JSONUtils::fileSegmentationEngineJSONCompactEachRow(in, memory, min_bytes, min_rows, max_rows); + }); + }; + + registerWithNamesAndTypes("JSONCompactEachRow", register_func); + registerWithNamesAndTypes("JSONCompactStringsEachRow", register_func); +} + } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index 463d3c53a65..ebeb939e7fa 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -64,6 +64,7 @@ public: void skipRowStartDelimiter() override; void skipFieldDelimiter() override; void skipRowEndDelimiter() override; + void skipRowBetweenDelimiter() override; void skipRow() override; diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index ffee2bea1cc..f7345848559 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -206,7 +206,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE updateDiagnosticInfo(); - if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types || is_header_detected)))) + if (likely(row_num != 1 || getCurrentUnitNumber() != 0 || (getCurrentUnitNumber() == 0 && (with_names || with_types || is_header_detected)))) format_reader->skipRowBetweenDelimiter(); format_reader->skipRowStartDelimiter(); From 64b4e1a66f768268c531a315e0633dae36e027f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 27 Dec 2023 17:42:51 +0100 Subject: [PATCH 43/88] Reintroduce compatibility with `is_deleted` on a syntax level --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9ed87e5c9ef..2dfe348e4ba 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -441,6 +441,15 @@ static StoragePtr create(const StorageFactory::Arguments & args) } else if (merging_params.mode == MergeTreeData::MergingParams::Replacing) { + /// Due to a misfortune, there could be an extra obsolete parameter. + /// We ignore it for backward compatibility. + if (arg_cnt - arg_num == 2 && !engine_args[arg_cnt - 1]->as() && is_extended_storage_def) + { + if (!tryGetIdentifierName(engine_args[arg_cnt - 1])) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "is_deleted column name must be an identifier {}", verbose_help_message); + --arg_cnt; + } + /// If the last element is not index_granularity or replica_name (a literal), then this is the name of the version column. if (arg_cnt && !engine_args[arg_cnt - 1]->as()) { From 5f183649b2e54e3e20bfab8bcbce637aa282ab90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 27 Dec 2023 17:44:46 +0100 Subject: [PATCH 44/88] Avoid throwing ABORTED on normal situations --- src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index a5f503718b6..cd434586d65 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -186,7 +186,8 @@ void MergeTreeBackgroundExecutor::removeTasksCorrespondingToStorage(Stora try { /// An exception context is needed to proper delete write buffers without finalization - throw Exception(ErrorCodes::ABORTED, "Storage is about to be deleted. Done pending task as if it was aborted."); + /// See WriteBuffer::~WriteBuffer for more context + throw std::runtime_error(""); } catch (...) { From dfe7b0e9738b244bd296487c291cb353d570bfc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 27 Dec 2023 18:13:22 +0100 Subject: [PATCH 45/88] Keep message --- src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index cd434586d65..a3f8e02f5eb 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -187,7 +187,7 @@ void MergeTreeBackgroundExecutor::removeTasksCorrespondingToStorage(Stora { /// An exception context is needed to proper delete write buffers without finalization /// See WriteBuffer::~WriteBuffer for more context - throw std::runtime_error(""); + throw std::runtime_error("Storage is about to be deleted. Done pending task as if it was aborted."); } catch (...) { From d7a6148828d92d6b34bc3ab076d6f517c47cff22 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 18:13:51 +0100 Subject: [PATCH 46/88] partially revert #54706 (135df6870c65d896fb015cbc1963a4972f76a93d) --- .../Merges/Algorithms/ReplacingSortedAlgorithm.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 0c0598171b3..4d2443b1e46 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -28,6 +28,7 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( , cleanup(cleanup_) , cleanedup_rows_count(cleanedup_rows_count_) { + if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); if (!version_column.empty()) @@ -82,11 +83,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; if (!cleanup || !value) insertRow(); - else if (cleanup && cleanedup_rows_count != nullptr) - { + else if (cleanedup_rows_count != nullptr) *cleanedup_rows_count += current_row_sources.size(); - current_row_sources.resize(0); - } } else insertRow(); @@ -143,11 +141,8 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; if (!cleanup || !value) insertRow(); - else if (cleanup && cleanedup_rows_count != nullptr) - { + else if (cleanedup_rows_count != nullptr) *cleanedup_rows_count += current_row_sources.size(); - current_row_sources.resize(0); - } } else insertRow(); From f9248483470ecd87fe7413beb6eb477910eb51d4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 18:17:59 +0100 Subject: [PATCH 47/88] partially revert #54368 (f28ad1e13663abfc94999ea32fc5830f1734495e) --- .../Algorithms/ReplacingSortedAlgorithm.cpp | 24 ++++++------------- .../Algorithms/ReplacingSortedAlgorithm.h | 4 +--- .../Merges/ReplacingSortedTransform.h | 6 ++--- src/Storages/MergeTree/MergeTask.cpp | 14 +++++------ src/Storages/MergeTree/MergeTask.h | 1 - 5 files changed, 16 insertions(+), 33 deletions(-) diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 4d2443b1e46..db770de858c 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -21,14 +21,10 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes, - bool cleanup_, - size_t * cleanedup_rows_count_) + bool cleanup_) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes) - , cleanup(cleanup_) - , cleanedup_rows_count(cleanedup_rows_count_) + , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) { - if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); if (!version_column.empty()) @@ -78,13 +74,10 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// Write the data for the previous primary key. if (!selected_row.empty()) { - if (is_deleted_column_number != -1) + if (is_deleted_column_number!=-1) { - uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; - if (!cleanup || !value) + if (!(cleanup && assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num])) insertRow(); - else if (cleanedup_rows_count != nullptr) - *cleanedup_rows_count += current_row_sources.size(); } else insertRow(); @@ -98,7 +91,7 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() if (out_row_sources_buf) current_row_sources.emplace_back(current.impl->order, true); - if (is_deleted_column_number != -1) + if ((is_deleted_column_number!=-1)) { const UInt8 is_deleted = assert_cast(*current->all_columns[is_deleted_column_number]).getData()[current->getRow()]; if ((is_deleted != 1) && (is_deleted != 0)) @@ -136,13 +129,10 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge() /// We will write the data for the last primary key. if (!selected_row.empty()) { - if (is_deleted_column_number != -1) + if (is_deleted_column_number!=-1) { - uint8_t value = assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]; - if (!cleanup || !value) + if (!(cleanup && assert_cast(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num])) insertRow(); - else if (cleanedup_rows_count != nullptr) - *cleanedup_rows_count += current_row_sources.size(); } else insertRow(); diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index b2258918fde..d57bab4708c 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -27,8 +27,7 @@ public: size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false, - bool cleanup = false, - size_t * cleanedup_rows_count = nullptr); + bool cleanup = false); const char * getName() const override { return "ReplacingSortedAlgorithm"; } Status merge() override; @@ -39,7 +38,6 @@ private: ssize_t is_deleted_column_number = -1; ssize_t version_column_number = -1; bool cleanup = false; - size_t * cleanedup_rows_count = nullptr; using RowRef = detail::RowRefWithOwnedChunk; static constexpr size_t max_row_refs = 2; /// last, current. diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 7e293db1aa8..9cd2f29a862 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -19,8 +19,7 @@ public: size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false, - bool cleanup = false, - size_t * cleanedup_rows_count = nullptr) + bool cleanup = false) : IMergingTransform( num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, @@ -32,8 +31,7 @@ public: max_block_size_bytes, out_row_sources_buf_, use_average_block_sizes, - cleanup, - cleanedup_rows_count) + cleanup) { } diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 2269e4369d8..6401427964b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -496,7 +496,6 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read; size_t input_rows_filtered = *global_ctx->input_rows_filtered; - size_t cleanedup_rows_count = global_ctx->cleanedup_rows_count; global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size(); global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed); @@ -509,13 +508,12 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// In special case, when there is only one source part, and no rows were skipped, we may have /// skipped writing rows_sources file. Otherwise rows_sources_count must be equal to the total /// number of input rows. - if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) - && sum_input_rows_exact != rows_sources_count + input_rows_filtered + cleanedup_rows_count) + if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) && sum_input_rows_exact != rows_sources_count + input_rows_filtered) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Number of rows in source parts ({}) excluding filtered rows ({}) and cleaned up rows ({}) differs from number " - "of bytes written to rows_sources file ({}). It is a bug.", - sum_input_rows_exact, input_rows_filtered, cleanedup_rows_count, rows_sources_count); + ErrorCodes::LOGICAL_ERROR, + "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " + "of bytes written to rows_sources file ({}). It is a bug.", + sum_input_rows_exact, input_rows_filtered, rows_sources_count); /// TemporaryDataOnDisk::createRawStream returns WriteBufferFromFile implementing IReadableWriteBuffer /// and we expect to get ReadBufferFromFile here. @@ -1010,7 +1008,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() merged_transform = std::make_shared( header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column, merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, - (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup, &global_ctx->cleanedup_rows_count); + (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup); break; case MergeTreeData::MergingParams::Graphite: diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 8a96ceb8c40..b2a5796737d 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -161,7 +161,6 @@ private: bool deduplicate{false}; Names deduplicate_by_columns{}; bool cleanup{false}; - size_t cleanedup_rows_count{0}; NamesAndTypesList gathering_columns{}; NamesAndTypesList merging_columns{}; From 041c5d421991ab16040c1b88ce3a3269f5ae1f29 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 27 Dec 2023 18:28:34 +0100 Subject: [PATCH 48/88] Make caches with the same path use the same cache objects --- src/Interpreters/Cache/FileCacheFactory.cpp | 61 ++++++++-- src/Interpreters/Cache/IFileCachePriority.cpp | 2 +- .../config.d/storage_conf_2.xml | 24 ++++ .../integration/test_filesystem_cache/test.py | 107 ++++++++++++++++++ 4 files changed, 186 insertions(+), 8 deletions(-) create mode 100644 tests/integration/test_filesystem_cache/config.d/storage_conf_2.xml diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index 84eafde9afd..3e857d8a8e3 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -50,12 +50,35 @@ FileCachePtr FileCacheFactory::getOrCreate( { std::lock_guard lock(mutex); - auto it = caches_by_name.find(cache_name); + auto it = std::find_if(caches_by_name.begin(), caches_by_name.end(), [&](const auto & cache_by_name) + { + return cache_by_name.second->getSettings().base_path == file_cache_settings.base_path; + }); + if (it == caches_by_name.end()) { auto cache = std::make_shared(cache_name, file_cache_settings); - it = caches_by_name.emplace( - cache_name, std::make_unique(cache, file_cache_settings, config_path)).first; + + bool inserted; + std::tie(it, inserted) = caches_by_name.emplace( + cache_name, std::make_unique(cache, file_cache_settings, config_path)); + + if (!inserted) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cache with name {} exists, but it has a different path", cache_name); + } + } + else if (it->second->getSettings() != file_cache_settings) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Found more than one cache configuration with the same path, " + "but with different cache settings ({} and {})", + it->first, cache_name); + } + else if (it->first != cache_name) + { + caches_by_name.emplace(cache_name, it->second); } return it->second->cache; @@ -69,12 +92,33 @@ FileCachePtr FileCacheFactory::create( std::lock_guard lock(mutex); auto it = caches_by_name.find(cache_name); + if (it != caches_by_name.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cache with name {} already exists", cache_name); - auto cache = std::make_shared(cache_name, file_cache_settings); - it = caches_by_name.emplace( - cache_name, std::make_unique(cache, file_cache_settings, config_path)).first; + it = std::find_if(caches_by_name.begin(), caches_by_name.end(), [&](const auto & cache_by_name) + { + return cache_by_name.second->getSettings().base_path == file_cache_settings.base_path; + }); + + if (it == caches_by_name.end()) + { + auto cache = std::make_shared(cache_name, file_cache_settings); + it = caches_by_name.emplace( + cache_name, std::make_unique(cache, file_cache_settings, config_path)).first; + } + else if (it->second->getSettings() != file_cache_settings) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Found more than one cache configuration with the same path, " + "but with different cache settings ({} and {})", + it->first, cache_name); + } + else + { + [[maybe_unused]] bool inserted = caches_by_name.emplace(cache_name, it->second).second; + chassert(inserted); + } return it->second->cache; } @@ -98,11 +142,14 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig caches_by_name_copy = caches_by_name; } + std::unordered_set checked_paths; for (const auto & [_, cache_info] : caches_by_name_copy) { - if (cache_info->config_path.empty()) + if (cache_info->config_path.empty() || checked_paths.contains(cache_info->config_path)) continue; + checked_paths.emplace(cache_info->config_path); + FileCacheSettings new_settings; new_settings.loadFromConfig(config, cache_info->config_path); diff --git a/src/Interpreters/Cache/IFileCachePriority.cpp b/src/Interpreters/Cache/IFileCachePriority.cpp index 9109e76562f..eb396a1e323 100644 --- a/src/Interpreters/Cache/IFileCachePriority.cpp +++ b/src/Interpreters/Cache/IFileCachePriority.cpp @@ -13,7 +13,7 @@ namespace DB IFileCachePriority::IFileCachePriority(size_t max_size_, size_t max_elements_) : max_size(max_size_), max_elements(max_elements_) { - CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); + CurrentMetrics::add(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); } IFileCachePriority::Entry::Entry( diff --git a/tests/integration/test_filesystem_cache/config.d/storage_conf_2.xml b/tests/integration/test_filesystem_cache/config.d/storage_conf_2.xml new file mode 100644 index 00000000000..a068d7b954c --- /dev/null +++ b/tests/integration/test_filesystem_cache/config.d/storage_conf_2.xml @@ -0,0 +1,24 @@ + + + + + local_blob_storage + / + + + cache + hdd_blob + /cache1/ + 1Mi + 1 + + + cache + hdd_blob + /cache1/ + 1Mi + 1 + + + + diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index 3a6a1ef76eb..ab1bc4e4344 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -21,6 +21,12 @@ def cluster(): ], stay_alive=True, ) + cluster.add_instance( + "node_caches_with_same_path", + main_configs=[ + "config.d/storage_conf_2.xml", + ], + ) logging.info("Starting cluster...") cluster.start() @@ -87,3 +93,104 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ) node.query("SELECT * FROM test FORMAT Null") assert count == int(node.query("SELECT count() FROM test")) + + +@pytest.mark.parametrize("node_name", ["node"]) +def test_caches_with_the_same_configuration(cluster, node_name): + node = cluster.instances[node_name] + cache_path = "cache1" + + node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + for table in ["test", "test2"]: + node.query( + f""" + DROP TABLE IF EXISTS {table} SYNC; + + CREATE TABLE {table} (key UInt32, value String) + Engine=MergeTree() + ORDER BY value + SETTINGS disk = disk( + type = cache, + name = {table}, + path = '{cache_path}', + disk = 'hdd_blob', + max_file_segment_size = '1Ki', + boundary_alignment = '1Ki', + cache_on_write_operations=1, + max_size = '1Mi'); + + SET enable_filesystem_cache_on_write_operations=1; + INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') + LIMIT 1000; + """ + ) + + size = int( + node.query( + "SELECT value FROM system.metrics WHERE name = 'FilesystemCacheSize'" + ) + ) + assert ( + node.query( + "SELECT cache_name, sum(size) FROM system.filesystem_cache GROUP BY cache_name ORDER BY cache_name" + ).strip() + == f"test\t{size}\ntest2\t{size}" + ) + + table = "test3" + assert ( + "Found more than one cache configuration with the same path, but with different cache settings" + in node.query_and_get_error( + f""" + DROP TABLE IF EXISTS {table} SYNC; + + CREATE TABLE {table} (key UInt32, value String) + Engine=MergeTree() + ORDER BY value + SETTINGS disk = disk( + type = cache, + name = {table}, + path = '{cache_path}', + disk = 'hdd_blob', + max_file_segment_size = '1Ki', + boundary_alignment = '1Ki', + cache_on_write_operations=0, + max_size = '2Mi'); + """ + ) + ) + + +@pytest.mark.parametrize("node_name", ["node_caches_with_same_path"]) +def test_caches_with_the_same_configuration_2(cluster, node_name): + node = cluster.instances[node_name] + cache_path = "cache1" + + node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + for table in ["cache1", "cache2"]: + node.query( + f""" + DROP TABLE IF EXISTS {table} SYNC; + + CREATE TABLE {table} (key UInt32, value String) + Engine=MergeTree() + ORDER BY value + SETTINGS disk = '{table}'; + + SET enable_filesystem_cache_on_write_operations=1; + INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') + LIMIT 1000; + """ + ) + + size = int( + node.query( + "SELECT value FROM system.metrics WHERE name = 'FilesystemCacheSize'" + ) + ) + assert ( + node.query( + "SELECT cache_name, sum(size) FROM system.filesystem_cache GROUP BY cache_name ORDER BY cache_name" + ).strip() + == f"cache1\t{size}\ncache2\t{size}" + ) From bcd34b25b2e9bacbec1025bc7b874247b29779af Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Dec 2023 17:42:40 +0000 Subject: [PATCH 49/88] Remove mayBenefitFromIndexForIn --- src/Storages/Hive/StorageHive.h | 10 ----- src/Storages/IStorage.h | 7 --- src/Storages/MergeTree/MergeTreeData.cpp | 45 ------------------- src/Storages/MergeTree/MergeTreeData.h | 2 - .../MergeTree/StorageFromMergeTreeDataPart.h | 8 ---- src/Storages/RocksDB/StorageEmbeddedRocksDB.h | 6 --- src/Storages/StorageBuffer.cpp | 9 ---- src/Storages/StorageBuffer.h | 3 -- src/Storages/StorageKeeperMap.h | 6 --- src/Storages/StorageMaterializedView.h | 7 --- src/Storages/StorageMerge.cpp | 22 --------- src/Storages/StorageMerge.h | 4 -- src/Storages/StorageMergeTree.h | 2 - src/Storages/StorageProxy.h | 6 --- src/Storages/StorageReplicatedMergeTree.h | 2 - src/Storages/System/StorageSystemNumbers.h | 7 --- .../System/StorageSystemZooKeeper.cpp | 5 --- src/Storages/System/StorageSystemZooKeeper.h | 2 - 18 files changed, 153 deletions(-) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index a3c47d400e2..8b378bf9e54 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -40,18 +40,8 @@ public: String getName() const override { return "Hive"; } - bool supportsIndexForIn() const override { return true; } - bool supportsSubcolumns() const override { return true; } - bool mayBenefitFromIndexForIn( - const ASTPtr & /* left_in_operand */, - ContextPtr /* query_context */, - const StorageMetadataPtr & /* metadata_snapshot */) const override - { - return true; - } - Pipe read( const Names & column_names, const StorageSnapshotPtr & storage_snapshot, diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 28f9ec6677a..ac30b293d4a 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -598,13 +598,6 @@ public: std::atomic is_detached{false}; std::atomic is_being_restarted{false}; - /// Does table support index for IN sections - virtual bool supportsIndexForIn() const { return false; } - - /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. - virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const { return false; } - - /** A list of tasks to check a validity of data. * Each IStorage implementation may interpret this task in its own way. * E.g. for some storages it's a list of files in filesystem, for others it can be a list of parts. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0ddeb0a6828..b25bef71bf7 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6551,51 +6551,6 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions( return false; } -bool MergeTreeData::mayBenefitFromIndexForIn( - const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const -{ - /// Make sure that the left side of the IN operator contain part of the key. - /// If there is a tuple on the left side of the IN operator, at least one item of the tuple - /// must be part of the key (probably wrapped by a chain of some acceptable functions). - const auto * left_in_operand_tuple = left_in_operand->as(); - const auto & index_factory = MergeTreeIndexFactory::instance(); - const auto & query_settings = query_context->getSettingsRef(); - - auto check_for_one_argument = [&](const auto & ast) - { - if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(ast, metadata_snapshot)) - return true; - - if (query_settings.use_skip_indexes) - { - for (const auto & index : metadata_snapshot->getSecondaryIndices()) - if (index_factory.get(index)->mayBenefitFromIndexForIn(ast)) - return true; - } - - if (query_settings.optimize_use_projections) - { - for (const auto & projection : metadata_snapshot->getProjections()) - if (projection.isPrimaryKeyColumnPossiblyWrappedInFunctions(ast)) - return true; - } - - return false; - }; - - if (left_in_operand_tuple && left_in_operand_tuple->name == "tuple") - { - for (const auto & item : left_in_operand_tuple->arguments->children) - if (check_for_one_argument(item)) - return true; - - /// The tuple itself may be part of the primary key - /// or skip index, so check that as a last resort. - } - - return check_for_one_argument(left_in_operand); -} - using PartitionIdToMaxBlock = std::unordered_map; Block MergeTreeData::getMinMaxCountProjectionBlock( diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c69c7aaba3d..ab3a641e37a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -443,8 +443,6 @@ public: NamesAndTypesList getVirtuals() const override; - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; - /// Snapshot for MergeTree contains the current set of data parts /// at the moment of the start of query. struct SnapshotData : public StorageSnapshot::Data diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 96b211085c1..6606e4d738e 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -86,18 +86,10 @@ public: bool supportsPrewhere() const override { return true; } - bool supportsIndexForIn() const override { return true; } - bool supportsDynamicSubcolumns() const override { return true; } bool supportsSubcolumns() const override { return true; } - bool mayBenefitFromIndexForIn( - const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override - { - return storage.mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); - } - NamesAndTypesList getVirtuals() const override { return storage.getVirtuals(); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 733baebb601..f2112641234 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -68,12 +68,6 @@ public: ContextPtr context) override; bool supportsParallelInsert() const override { return true; } - bool supportsIndexForIn() const override { return true; } - bool mayBenefitFromIndexForIn( - const ASTPtr & node, ContextPtr /*query_context*/, const StorageMetadataPtr & /*metadata_snapshot*/) const override - { - return node->getColumnName() == primary_key; - } bool storesDataOnDisk() const override { return true; } Strings getDataPaths() const override { return {rocksdb_dir}; } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 2646a7753e4..943bf0eb801 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -665,15 +665,6 @@ SinkToStoragePtr StorageBuffer::write(const ASTPtr & /*query*/, const StorageMet } -bool StorageBuffer::mayBenefitFromIndexForIn( - const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const -{ - if (auto destination = getDestinationTable()) - return destination->mayBenefitFromIndexForIn(left_in_operand, query_context, destination->getInMemoryMetadataPtr()); - return false; -} - - void StorageBuffer::startup() { if (getContext()->getSettingsRef().readonly) diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 21eb86019fc..2610cf79989 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -105,9 +105,6 @@ public: bool supportsSampling() const override { return true; } bool supportsPrewhere() const override; bool supportsFinal() const override { return true; } - bool supportsIndexForIn() const override { return true; } - - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override; diff --git a/src/Storages/StorageKeeperMap.h b/src/Storages/StorageKeeperMap.h index 10eebdd0129..aa9687243d8 100644 --- a/src/Storages/StorageKeeperMap.h +++ b/src/Storages/StorageKeeperMap.h @@ -67,12 +67,6 @@ public: void mutate(const MutationCommands & commands, ContextPtr context) override; bool supportsParallelInsert() const override { return true; } - bool supportsIndexForIn() const override { return true; } - bool mayBenefitFromIndexForIn( - const ASTPtr & node, ContextPtr /*query_context*/, const StorageMetadataPtr & /*metadata_snapshot*/) const override - { - return node->getColumnName() == primary_key; - } bool supportsDelete() const override { return true; } void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 9ddcc458f3e..f37abdfb1a3 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -29,16 +29,9 @@ public: bool supportsSampling() const override { return getTargetTable()->supportsSampling(); } bool supportsPrewhere() const override { return getTargetTable()->supportsPrewhere(); } bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } - bool supportsIndexForIn() const override { return getTargetTable()->supportsIndexForIn(); } bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } bool supportsSubcolumns() const override { return getTargetTable()->supportsSubcolumns(); } bool supportsTransactions() const override { return getTargetTable()->supportsTransactions(); } - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /* metadata_snapshot */) const override - { - auto target_table = getTargetTable(); - auto metadata_snapshot = target_table->getInMemoryMetadataPtr(); - return target_table->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); - } SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr context, bool async_insert) override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 36c92129177..868dbc4b231 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -236,28 +236,6 @@ std::optional StorageMerge::supportedPrewhereColumns() const return supported_columns; } -bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const -{ - /// It's beneficial if it is true for at least one table. - StorageListWithLocks selected_tables = getSelectedTables(query_context); - - size_t i = 0; - for (const auto & table : selected_tables) - { - const auto & storage_ptr = std::get<1>(table); - auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - if (storage_ptr->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot)) - return true; - - ++i; - /// For simplicity reasons, check only first ten tables. - if (i > 10) - break; - } - - return false; -} - QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( ContextPtr local_context, diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 2455eb678bb..97e453facdf 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -45,7 +45,6 @@ public: /// The check is delayed to the read method. It checks the support of the tables used. bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - bool supportsIndexForIn() const override { return true; } bool supportsSubcolumns() const override { return true; } bool supportsPrewhere() const override { return true; } std::optional supportedPrewhereColumns() const override; @@ -71,9 +70,6 @@ public: /// the structure of sub-tables is not checked void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; - bool mayBenefitFromIndexForIn( - const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override; - /// Evaluate database name or regexp for StorageMerge and TableFunction merge static std::tuple evaluateDatabaseName(const ASTPtr & node, ContextPtr context); diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 89de60ed819..f4dc52659b1 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -53,8 +53,6 @@ public: bool supportsParallelInsert() const override { return true; } - bool supportsIndexForIn() const override { return true; } - bool supportsTransactions() const override { return true; } void read( diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 17f1b2a6d97..8fbc1313528 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -142,12 +142,6 @@ public: ActionLock getActionLock(StorageActionBlockType action_type) override { return getNested()->getActionLock(action_type); } - bool supportsIndexForIn() const override { return getNested()->supportsIndexForIn(); } - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override - { - return getNested()->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); - } - DataValidationTasksPtr getCheckTaskList(const CheckTaskFilter & check_task_filter, ContextPtr context) override { return getNested()->getCheckTaskList(check_task_filter, context); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 94ddaa753a5..bb2cc04411a 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -199,8 +199,6 @@ public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; - bool supportsIndexForIn() const override { return true; } - void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override; ActionLock getActionLock(StorageActionBlockType action_type) override; diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 4c98a359ed5..fe6227db406 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -60,13 +60,6 @@ public: bool isSystemStorage() const override { return true; } bool supportsTransactions() const override { return true; } - bool supportsIndexForIn() const override { return true; } - - bool mayBenefitFromIndexForIn( - const ASTPtr & left_in_operand, ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const override - { - return left_in_operand->as() && left_in_operand->getColumnName() == "number"; - } private: friend class ReadFromSystemNumbersStep; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index bd7c81df107..ba069380855 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -212,11 +212,6 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) setInMemoryMetadata(storage_metadata); } -bool StorageSystemZooKeeper::mayBenefitFromIndexForIn(const ASTPtr & node, ContextPtr, const StorageMetadataPtr &) const -{ - return node->as() && node->getColumnName() == "path"; -} - void StorageSystemZooKeeper::read( QueryPlan & query_plan, const Names & /*column_names*/, diff --git a/src/Storages/System/StorageSystemZooKeeper.h b/src/Storages/System/StorageSystemZooKeeper.h index a016d3ad74c..7f7aba862a2 100644 --- a/src/Storages/System/StorageSystemZooKeeper.h +++ b/src/Storages/System/StorageSystemZooKeeper.h @@ -33,8 +33,6 @@ public: size_t /*num_streams*/) override; bool isSystemStorage() const override { return true; } - bool supportsIndexForIn() const override { return true; } - bool mayBenefitFromIndexForIn(const ASTPtr & node, ContextPtr, const StorageMetadataPtr &) const override; }; } From e493789bf346d4521391de925f39b9da0d929866 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 27 Dec 2023 17:51:23 +0000 Subject: [PATCH 50/88] Remove from indexes as well. --- src/Storages/MergeTree/MergeTreeIndexAnnoy.h | 2 -- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 27 ------------------- .../MergeTree/MergeTreeIndexBloomFilter.h | 2 -- .../MergeTree/MergeTreeIndexFullText.cpp | 5 ---- .../MergeTree/MergeTreeIndexFullText.h | 2 -- .../MergeTree/MergeTreeIndexHypothesis.cpp | 5 ---- .../MergeTree/MergeTreeIndexHypothesis.h | 2 -- .../MergeTree/MergeTreeIndexInverted.cpp | 5 ---- .../MergeTree/MergeTreeIndexInverted.h | 2 -- .../MergeTree/MergeTreeIndexMinMax.cpp | 15 ----------- src/Storages/MergeTree/MergeTreeIndexMinMax.h | 2 -- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 5 ---- src/Storages/MergeTree/MergeTreeIndexSet.h | 2 -- .../MergeTree/MergeTreeIndexUSearch.h | 2 -- src/Storages/MergeTree/MergeTreeIndices.h | 3 --- 15 files changed, 81 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h index 1e753fd9bc4..dead12fe66f 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.h +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.h @@ -100,8 +100,6 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & /*node*/) const override { return false; } - private: const UInt64 trees; const String distance_function; diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index ea3f4eac090..fa05f9e61e1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -38,33 +38,6 @@ MergeTreeIndexGranulePtr MergeTreeIndexBloomFilter::createIndexGranule() const return std::make_shared(bits_per_row, hash_functions, index.column_names.size()); } -bool MergeTreeIndexBloomFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const -{ - Names required_columns = index.expression->getRequiredColumns(); - NameSet required_columns_set(required_columns.begin(), required_columns.end()); - - std::vector nodes_to_check; - nodes_to_check.emplace_back(node); - - while (!nodes_to_check.empty()) - { - auto node_to_check = nodes_to_check.back(); - nodes_to_check.pop_back(); - - const auto & column_name = node_to_check->getColumnName(); - if (required_columns_set.find(column_name) != required_columns_set.end()) - return true; - - if (const auto * function = typeid_cast(node_to_check.get())) - { - auto & function_arguments_children = function->arguments->children; - nodes_to_check.insert(nodes_to_check.end(), function_arguments_children.begin(), function_arguments_children.end()); - } - } - - return false; -} - MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilter::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const { return std::make_shared(bits_per_row, hash_functions, index.column_names); diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h index f60630c8056..4d688ae3cfc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.h @@ -22,8 +22,6 @@ public: MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query_info, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; - private: size_t bits_per_row; size_t hash_functions; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index cf8cd1ffba4..6c1fff53109 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -696,11 +696,6 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); } -bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const -{ - return std::find(std::cbegin(index.column_names), std::cend(index.column_names), node->getColumnName()) != std::cend(index.column_names); -} - MergeTreeIndexPtr bloomFilterIndexCreator( const IndexDescription & index) { diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index e4e9a670046..22f9215d563 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -167,8 +167,6 @@ public: MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; - BloomFilterParameters params; /// Function for selecting next token. std::unique_ptr token_extractor; diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index f5136cc3764..818bae40067 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -91,11 +91,6 @@ MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCond query_info, storage_metadata->getConstraints(), index.granularity); } -bool MergeTreeIndexHypothesis::mayBenefitFromIndexForIn(const ASTPtr &) const -{ - return false; -} - MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index) { return std::make_shared(index); diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index e138559a1d0..1cd0e3daf27 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -75,8 +75,6 @@ public: MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; - size_t max_rows = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 908fc98728e..5e2a034cb97 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -726,11 +726,6 @@ MergeTreeIndexConditionPtr MergeTreeIndexInverted::createIndexCondition( return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); }; -bool MergeTreeIndexInverted::mayBenefitFromIndexForIn(const ASTPtr & node) const -{ - return std::find(std::cbegin(index.column_names), std::cend(index.column_names), node->getColumnName()) != std::cend(index.column_names); -} - MergeTreeIndexPtr invertedIndexCreator( const IndexDescription & index) { diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h index 86bf7613646..413cf206f0e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -171,8 +171,6 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store, const MergeTreeWriterSettings & /*settings*/) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; - GinFilterParameters params; /// Function for selecting next token. std::unique_ptr token_extractor; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index 35c52d6cd5e..2154c18f1a7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -211,21 +211,6 @@ MergeTreeIndexConditionPtr MergeTreeIndexMinMax::createIndexCondition( return std::make_shared(index, query, context); } -bool MergeTreeIndexMinMax::mayBenefitFromIndexForIn(const ASTPtr & node) const -{ - const String column_name = node->getColumnName(); - - for (const auto & cname : index.column_names) - if (column_name == cname) - return true; - - if (const auto * func = typeid_cast(node.get())) - if (func->arguments->children.size() == 1) - return mayBenefitFromIndexForIn(func->arguments->children.front()); - - return false; -} - MergeTreeIndexFormat MergeTreeIndexMinMax::getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & relative_path_prefix) const { if (data_part_storage.exists(relative_path_prefix + ".idx2")) diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 4517a195362..a1a216fdf72 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -81,8 +81,6 @@ public: MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; - const char* getSerializedFileExtension() const override { return ".idx2"; } MergeTreeIndexFormat getDeserializedFormat(const IDataPartStorage & data_part_storage, const std::string & path_prefix) const override; /// NOLINT }; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index ec02d9418a7..612c5d868cb 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -709,11 +709,6 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition( return std::make_shared(index.name, index.sample_block, max_rows, query, context); } -bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const -{ - return false; -} - MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) { size_t max_rows = index.arguments[0].get(); diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 022988c2453..a53476ca751 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -151,8 +151,6 @@ public: MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; - size_t max_rows = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeIndexUSearch.h b/src/Storages/MergeTree/MergeTreeIndexUSearch.h index bf58928a577..a7675620a2e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexUSearch.h +++ b/src/Storages/MergeTree/MergeTreeIndexUSearch.h @@ -102,8 +102,6 @@ public: MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; - bool mayBenefitFromIndexForIn(const ASTPtr & /*node*/) const override { return false; } - private: const String distance_function; const unum::usearch::scalar_kind_t scalar_kind; diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 77062f09be9..da1e914b90e 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -160,9 +160,6 @@ struct IMergeTreeIndex return {0 /*unknown*/, ""}; } - /// Checks whether the column is in data skipping index. - virtual bool mayBenefitFromIndexForIn(const ASTPtr & node) const = 0; - virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0; virtual MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const = 0; From 299fb35e834f1c2d20dbd830478dc2213193ea01 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 27 Dec 2023 18:09:09 +0000 Subject: [PATCH 51/88] Fix test --- .../02949_ttl_group_by_bug.reference | 709 ------------------ .../0_stateless/02949_ttl_group_by_bug.sql | 5 - 2 files changed, 714 deletions(-) diff --git a/tests/queries/0_stateless/02949_ttl_group_by_bug.reference b/tests/queries/0_stateless/02949_ttl_group_by_bug.reference index 9782d63118b..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02949_ttl_group_by_bug.reference +++ b/tests/queries/0_stateless/02949_ttl_group_by_bug.reference @@ -1,709 +0,0 @@ -0 2023-12-27 15:36:00 0 0 0 -0 2023-12-27 15:39:00 0 0 0 -0 2023-12-27 15:41:54 0 0 0 -0 2023-12-27 15:41:59 0 0 0 -0 2023-12-27 15:42:04 0 0 0 -0 2023-12-27 15:42:09 0 0 0 -0 2023-12-27 15:42:14 0 0 0 -0 2023-12-27 15:42:19 0 0 0 -0 2023-12-27 15:42:24 0 0 0 -0 2023-12-27 15:42:29 0 0 0 -0 2023-12-27 15:42:34 0 0 0 -0 2023-12-27 15:42:39 0 0 0 -0 2023-12-27 15:42:44 0 0 0 -0 2023-12-27 15:42:49 0 0 0 -0 2023-12-27 15:42:54 0 0 0 -0 2023-12-27 15:42:59 0 0 0 -0 2023-12-27 15:43:04 0 0 0 -0 2023-12-27 15:43:09 0 0 0 -0 2023-12-27 15:43:14 0 0 0 -0 2023-12-27 15:43:19 0 0 0 -0 2023-12-27 15:43:24 0 0 0 -0 2023-12-27 15:43:29 0 0 0 -0 2023-12-27 15:43:34 0 0 0 -0 2023-12-27 15:43:39 0 0 0 -0 2023-12-27 15:43:44 0 0 0 -0 2023-12-27 15:43:49 0 0 0 -0 2023-12-27 15:43:54 0 0 0 -0 2023-12-27 15:43:59 0 0 0 -0 2023-12-27 15:44:04 0 0 0 -0 2023-12-27 15:44:09 0 0 0 -0 2023-12-27 15:44:14 0 0 0 -0 2023-12-27 15:44:19 0 0 0 -0 2023-12-27 15:44:24 0 0 0 -0 2023-12-27 15:44:29 0 0 0 -0 2023-12-27 15:44:34 0 0 0 -0 2023-12-27 15:44:39 0 0 0 -0 2023-12-27 15:44:44 0 0 0 -0 2023-12-27 15:44:49 0 0 0 -0 2023-12-27 15:44:54 0 0 0 -0 2023-12-27 15:44:59 0 0 0 -0 2023-12-27 15:45:04 0 0 0 -0 2023-12-27 15:45:09 0 0 0 -0 2023-12-27 15:45:14 0 0 0 -0 2023-12-27 15:45:19 0 0 0 -0 2023-12-27 15:45:24 0 0 0 -0 2023-12-27 15:45:29 0 0 0 -0 2023-12-27 15:45:34 0 0 0 -0 2023-12-27 15:45:39 0 0 0 -0 2023-12-27 15:45:44 0 0 0 -0 2023-12-27 15:45:49 0 0 0 -0 2023-12-27 15:45:54 0 0 0 -0 2023-12-27 15:45:59 0 0 0 -0 2023-12-27 15:46:04 0 0 0 -0 2023-12-27 15:46:09 0 0 0 -0 2023-12-27 15:46:14 0 0 0 -0 2023-12-27 15:46:19 0 0 0 -0 2023-12-27 15:46:24 0 0 0 -0 2023-12-27 15:46:29 0 0 0 -0 2023-12-27 15:46:34 0 0 0 -0 2023-12-27 15:46:39 0 0 0 -0 2023-12-27 15:46:44 0 0 0 -0 2023-12-27 15:46:49 0 0 0 -0 2023-12-27 15:46:54 0 0 0 -0 2023-12-27 15:46:59 0 0 0 -0 2023-12-27 15:47:04 0 0 0 -0 2023-12-27 15:47:09 0 0 0 -0 2023-12-27 15:47:14 0 0 0 -0 2023-12-27 15:47:19 0 0 0 -0 2023-12-27 15:47:24 0 0 0 -0 2023-12-27 15:47:29 0 0 0 -0 2023-12-27 15:47:34 0 0 0 -0 2023-12-27 15:47:39 0 0 0 -0 2023-12-27 15:47:44 0 0 0 -0 2023-12-27 15:47:49 0 0 0 -0 2023-12-27 15:47:54 0 0 0 -0 2023-12-27 15:47:59 0 0 0 -0 2023-12-27 15:48:04 0 0 0 -0 2023-12-27 15:48:09 0 0 0 -0 2023-12-27 15:48:14 0 0 0 -0 2023-12-27 15:48:19 0 0 0 -0 2023-12-27 15:48:24 0 0 0 -0 2023-12-27 15:48:29 0 0 0 -0 2023-12-27 15:48:34 0 0 0 -0 2023-12-27 15:48:39 0 0 0 -0 2023-12-27 15:48:44 0 0 0 -0 2023-12-27 15:48:49 0 0 0 -0 2023-12-27 15:48:54 0 0 0 -0 2023-12-27 15:48:59 0 0 0 -0 2023-12-27 15:49:04 0 0 0 -0 2023-12-27 15:49:09 0 0 0 -0 2023-12-27 15:49:14 0 0 0 -0 2023-12-27 15:49:19 0 0 0 -0 2023-12-27 15:49:24 0 0 0 -0 2023-12-27 15:49:29 0 0 0 -0 2023-12-27 15:49:34 0 0 0 -0 2023-12-27 15:49:39 0 0 0 -0 2023-12-27 15:49:44 0 0 0 -0 2023-12-27 15:49:49 0 0 0 -0 2023-12-27 15:49:54 0 0 0 -0 2023-12-27 15:49:59 0 0 0 -0 2023-12-27 15:50:04 0 0 0 -0 2023-12-27 15:50:09 0 0 0 -0 2023-12-27 15:50:14 0 0 0 -0 2023-12-27 15:50:19 0 0 0 -0 2023-12-27 15:50:24 0 0 0 -0 2023-12-27 15:50:29 0 0 0 -0 2023-12-27 15:50:34 0 0 0 -0 2023-12-27 15:50:39 0 0 0 -0 2023-12-27 15:50:44 0 0 0 -0 2023-12-27 15:50:49 0 0 0 -0 2023-12-27 15:50:54 0 0 0 -0 2023-12-27 15:50:59 0 0 0 -0 2023-12-27 15:51:04 0 0 0 -0 2023-12-27 15:51:09 0 0 0 -0 2023-12-27 15:51:14 0 0 0 -0 2023-12-27 15:51:19 0 0 0 -0 2023-12-27 15:51:24 0 0 0 -0 2023-12-27 15:51:29 0 0 0 -0 2023-12-27 15:51:34 0 0 0 -0 2023-12-27 15:51:39 0 0 0 -0 2023-12-27 15:51:44 0 0 0 -0 2023-12-27 15:51:49 0 0 0 -0 2023-12-27 15:51:54 0 0 0 -0 2023-12-27 15:51:59 0 0 0 -0 2023-12-27 15:52:04 0 0 0 -0 2023-12-27 15:52:09 0 0 0 -0 2023-12-27 15:52:14 0 0 0 -0 2023-12-27 15:52:19 0 0 0 -0 2023-12-27 15:52:24 0 0 0 -0 2023-12-27 15:52:29 0 0 0 -0 2023-12-27 15:52:34 0 0 0 -0 2023-12-27 15:52:39 0 0 0 -0 2023-12-27 15:52:44 0 0 0 -0 2023-12-27 15:52:49 0 0 0 -0 2023-12-27 15:52:54 0 0 0 -0 2023-12-27 15:52:59 0 0 0 -0 2023-12-27 15:53:04 0 0 0 -0 2023-12-27 15:53:09 0 0 0 -0 2023-12-27 15:53:14 0 0 0 -0 2023-12-27 15:53:19 0 0 0 -0 2023-12-27 15:53:24 0 0 0 -1 2023-12-27 15:36:00 0 0 0 -1 2023-12-27 15:39:00 0 0 0 -1 2023-12-27 15:41:50 0 0 0 -1 2023-12-27 15:41:55 0 0 0 -1 2023-12-27 15:42:00 0 0 0 -1 2023-12-27 15:42:05 0 0 0 -1 2023-12-27 15:42:10 0 0 0 -1 2023-12-27 15:42:15 0 0 0 -1 2023-12-27 15:42:20 0 0 0 -1 2023-12-27 15:42:25 0 0 0 -1 2023-12-27 15:42:30 0 0 0 -1 2023-12-27 15:42:35 0 0 0 -1 2023-12-27 15:42:40 0 0 0 -1 2023-12-27 15:42:45 0 0 0 -1 2023-12-27 15:42:50 0 0 0 -1 2023-12-27 15:42:55 0 0 0 -1 2023-12-27 15:43:00 0 0 0 -1 2023-12-27 15:43:05 0 0 0 -1 2023-12-27 15:43:10 0 0 0 -1 2023-12-27 15:43:15 0 0 0 -1 2023-12-27 15:43:20 0 0 0 -1 2023-12-27 15:43:25 0 0 0 -1 2023-12-27 15:43:30 0 0 0 -1 2023-12-27 15:43:35 0 0 0 -1 2023-12-27 15:43:40 0 0 0 -1 2023-12-27 15:43:45 0 0 0 -1 2023-12-27 15:43:50 0 0 0 -1 2023-12-27 15:43:55 0 0 0 -1 2023-12-27 15:44:00 0 0 0 -1 2023-12-27 15:44:05 0 0 0 -1 2023-12-27 15:44:10 0 0 0 -1 2023-12-27 15:44:15 0 0 0 -1 2023-12-27 15:44:20 0 0 0 -1 2023-12-27 15:44:25 0 0 0 -1 2023-12-27 15:44:30 0 0 0 -1 2023-12-27 15:44:35 0 0 0 -1 2023-12-27 15:44:40 0 0 0 -1 2023-12-27 15:44:45 0 0 0 -1 2023-12-27 15:44:50 0 0 0 -1 2023-12-27 15:44:55 0 0 0 -1 2023-12-27 15:45:00 0 0 0 -1 2023-12-27 15:45:05 0 0 0 -1 2023-12-27 15:45:10 0 0 0 -1 2023-12-27 15:45:15 0 0 0 -1 2023-12-27 15:45:20 0 0 0 -1 2023-12-27 15:45:25 0 0 0 -1 2023-12-27 15:45:30 0 0 0 -1 2023-12-27 15:45:35 0 0 0 -1 2023-12-27 15:45:40 0 0 0 -1 2023-12-27 15:45:45 0 0 0 -1 2023-12-27 15:45:50 0 0 0 -1 2023-12-27 15:45:55 0 0 0 -1 2023-12-27 15:46:00 0 0 0 -1 2023-12-27 15:46:05 0 0 0 -1 2023-12-27 15:46:10 0 0 0 -1 2023-12-27 15:46:15 0 0 0 -1 2023-12-27 15:46:20 0 0 0 -1 2023-12-27 15:46:25 0 0 0 -1 2023-12-27 15:46:30 0 0 0 -1 2023-12-27 15:46:35 0 0 0 -1 2023-12-27 15:46:40 0 0 0 -1 2023-12-27 15:46:45 0 0 0 -1 2023-12-27 15:46:50 0 0 0 -1 2023-12-27 15:46:55 0 0 0 -1 2023-12-27 15:47:00 0 0 0 -1 2023-12-27 15:47:05 0 0 0 -1 2023-12-27 15:47:10 0 0 0 -1 2023-12-27 15:47:15 0 0 0 -1 2023-12-27 15:47:20 0 0 0 -1 2023-12-27 15:47:25 0 0 0 -1 2023-12-27 15:47:30 0 0 0 -1 2023-12-27 15:47:35 0 0 0 -1 2023-12-27 15:47:40 0 0 0 -1 2023-12-27 15:47:45 0 0 0 -1 2023-12-27 15:47:50 0 0 0 -1 2023-12-27 15:47:55 0 0 0 -1 2023-12-27 15:48:00 0 0 0 -1 2023-12-27 15:48:05 0 0 0 -1 2023-12-27 15:48:10 0 0 0 -1 2023-12-27 15:48:15 0 0 0 -1 2023-12-27 15:48:20 0 0 0 -1 2023-12-27 15:48:25 0 0 0 -1 2023-12-27 15:48:30 0 0 0 -1 2023-12-27 15:48:35 0 0 0 -1 2023-12-27 15:48:40 0 0 0 -1 2023-12-27 15:48:45 0 0 0 -1 2023-12-27 15:48:50 0 0 0 -1 2023-12-27 15:48:55 0 0 0 -1 2023-12-27 15:49:00 0 0 0 -1 2023-12-27 15:49:05 0 0 0 -1 2023-12-27 15:49:10 0 0 0 -1 2023-12-27 15:49:15 0 0 0 -1 2023-12-27 15:49:20 0 0 0 -1 2023-12-27 15:49:25 0 0 0 -1 2023-12-27 15:49:30 0 0 0 -1 2023-12-27 15:49:35 0 0 0 -1 2023-12-27 15:49:40 0 0 0 -1 2023-12-27 15:49:45 0 0 0 -1 2023-12-27 15:49:50 0 0 0 -1 2023-12-27 15:49:55 0 0 0 -1 2023-12-27 15:50:00 0 0 0 -1 2023-12-27 15:50:05 0 0 0 -1 2023-12-27 15:50:10 0 0 0 -1 2023-12-27 15:50:15 0 0 0 -1 2023-12-27 15:50:20 0 0 0 -1 2023-12-27 15:50:25 0 0 0 -1 2023-12-27 15:50:30 0 0 0 -1 2023-12-27 15:50:35 0 0 0 -1 2023-12-27 15:50:40 0 0 0 -1 2023-12-27 15:50:45 0 0 0 -1 2023-12-27 15:50:50 0 0 0 -1 2023-12-27 15:50:55 0 0 0 -1 2023-12-27 15:51:00 0 0 0 -1 2023-12-27 15:51:05 0 0 0 -1 2023-12-27 15:51:10 0 0 0 -1 2023-12-27 15:51:15 0 0 0 -1 2023-12-27 15:51:20 0 0 0 -1 2023-12-27 15:51:25 0 0 0 -1 2023-12-27 15:51:30 0 0 0 -1 2023-12-27 15:51:35 0 0 0 -1 2023-12-27 15:51:40 0 0 0 -1 2023-12-27 15:51:45 0 0 0 -1 2023-12-27 15:51:50 0 0 0 -1 2023-12-27 15:51:55 0 0 0 -1 2023-12-27 15:52:00 0 0 0 -1 2023-12-27 15:52:05 0 0 0 -1 2023-12-27 15:52:10 0 0 0 -1 2023-12-27 15:52:15 0 0 0 -1 2023-12-27 15:52:20 0 0 0 -1 2023-12-27 15:52:25 0 0 0 -1 2023-12-27 15:52:30 0 0 0 -1 2023-12-27 15:52:35 0 0 0 -1 2023-12-27 15:52:40 0 0 0 -1 2023-12-27 15:52:45 0 0 0 -1 2023-12-27 15:52:50 0 0 0 -1 2023-12-27 15:52:55 0 0 0 -1 2023-12-27 15:53:00 0 0 0 -1 2023-12-27 15:53:05 0 0 0 -1 2023-12-27 15:53:10 0 0 0 -1 2023-12-27 15:53:15 0 0 0 -1 2023-12-27 15:53:20 0 0 0 -1 2023-12-27 15:53:25 0 0 0 -2 2023-12-27 15:36:00 0 0 0 -2 2023-12-27 15:39:00 0 0 0 -2 2023-12-27 15:41:51 0 0 0 -2 2023-12-27 15:41:56 0 0 0 -2 2023-12-27 15:42:01 0 0 0 -2 2023-12-27 15:42:06 0 0 0 -2 2023-12-27 15:42:11 0 0 0 -2 2023-12-27 15:42:16 0 0 0 -2 2023-12-27 15:42:21 0 0 0 -2 2023-12-27 15:42:26 0 0 0 -2 2023-12-27 15:42:31 0 0 0 -2 2023-12-27 15:42:36 0 0 0 -2 2023-12-27 15:42:41 0 0 0 -2 2023-12-27 15:42:46 0 0 0 -2 2023-12-27 15:42:51 0 0 0 -2 2023-12-27 15:42:56 0 0 0 -2 2023-12-27 15:43:01 0 0 0 -2 2023-12-27 15:43:06 0 0 0 -2 2023-12-27 15:43:11 0 0 0 -2 2023-12-27 15:43:16 0 0 0 -2 2023-12-27 15:43:21 0 0 0 -2 2023-12-27 15:43:26 0 0 0 -2 2023-12-27 15:43:31 0 0 0 -2 2023-12-27 15:43:36 0 0 0 -2 2023-12-27 15:43:41 0 0 0 -2 2023-12-27 15:43:46 0 0 0 -2 2023-12-27 15:43:51 0 0 0 -2 2023-12-27 15:43:56 0 0 0 -2 2023-12-27 15:44:01 0 0 0 -2 2023-12-27 15:44:06 0 0 0 -2 2023-12-27 15:44:11 0 0 0 -2 2023-12-27 15:44:16 0 0 0 -2 2023-12-27 15:44:21 0 0 0 -2 2023-12-27 15:44:26 0 0 0 -2 2023-12-27 15:44:31 0 0 0 -2 2023-12-27 15:44:36 0 0 0 -2 2023-12-27 15:44:41 0 0 0 -2 2023-12-27 15:44:46 0 0 0 -2 2023-12-27 15:44:51 0 0 0 -2 2023-12-27 15:44:56 0 0 0 -2 2023-12-27 15:45:01 0 0 0 -2 2023-12-27 15:45:06 0 0 0 -2 2023-12-27 15:45:11 0 0 0 -2 2023-12-27 15:45:16 0 0 0 -2 2023-12-27 15:45:21 0 0 0 -2 2023-12-27 15:45:26 0 0 0 -2 2023-12-27 15:45:31 0 0 0 -2 2023-12-27 15:45:36 0 0 0 -2 2023-12-27 15:45:41 0 0 0 -2 2023-12-27 15:45:46 0 0 0 -2 2023-12-27 15:45:51 0 0 0 -2 2023-12-27 15:45:56 0 0 0 -2 2023-12-27 15:46:01 0 0 0 -2 2023-12-27 15:46:06 0 0 0 -2 2023-12-27 15:46:11 0 0 0 -2 2023-12-27 15:46:16 0 0 0 -2 2023-12-27 15:46:21 0 0 0 -2 2023-12-27 15:46:26 0 0 0 -2 2023-12-27 15:46:31 0 0 0 -2 2023-12-27 15:46:36 0 0 0 -2 2023-12-27 15:46:41 0 0 0 -2 2023-12-27 15:46:46 0 0 0 -2 2023-12-27 15:46:51 0 0 0 -2 2023-12-27 15:46:56 0 0 0 -2 2023-12-27 15:47:01 0 0 0 -2 2023-12-27 15:47:06 0 0 0 -2 2023-12-27 15:47:11 0 0 0 -2 2023-12-27 15:47:16 0 0 0 -2 2023-12-27 15:47:21 0 0 0 -2 2023-12-27 15:47:26 0 0 0 -2 2023-12-27 15:47:31 0 0 0 -2 2023-12-27 15:47:36 0 0 0 -2 2023-12-27 15:47:41 0 0 0 -2 2023-12-27 15:47:46 0 0 0 -2 2023-12-27 15:47:51 0 0 0 -2 2023-12-27 15:47:56 0 0 0 -2 2023-12-27 15:48:01 0 0 0 -2 2023-12-27 15:48:06 0 0 0 -2 2023-12-27 15:48:11 0 0 0 -2 2023-12-27 15:48:16 0 0 0 -2 2023-12-27 15:48:21 0 0 0 -2 2023-12-27 15:48:26 0 0 0 -2 2023-12-27 15:48:31 0 0 0 -2 2023-12-27 15:48:36 0 0 0 -2 2023-12-27 15:48:41 0 0 0 -2 2023-12-27 15:48:46 0 0 0 -2 2023-12-27 15:48:51 0 0 0 -2 2023-12-27 15:48:56 0 0 0 -2 2023-12-27 15:49:01 0 0 0 -2 2023-12-27 15:49:06 0 0 0 -2 2023-12-27 15:49:11 0 0 0 -2 2023-12-27 15:49:16 0 0 0 -2 2023-12-27 15:49:21 0 0 0 -2 2023-12-27 15:49:26 0 0 0 -2 2023-12-27 15:49:31 0 0 0 -2 2023-12-27 15:49:36 0 0 0 -2 2023-12-27 15:49:41 0 0 0 -2 2023-12-27 15:49:46 0 0 0 -2 2023-12-27 15:49:51 0 0 0 -2 2023-12-27 15:49:56 0 0 0 -2 2023-12-27 15:50:01 0 0 0 -2 2023-12-27 15:50:06 0 0 0 -2 2023-12-27 15:50:11 0 0 0 -2 2023-12-27 15:50:16 0 0 0 -2 2023-12-27 15:50:21 0 0 0 -2 2023-12-27 15:50:26 0 0 0 -2 2023-12-27 15:50:31 0 0 0 -2 2023-12-27 15:50:36 0 0 0 -2 2023-12-27 15:50:41 0 0 0 -2 2023-12-27 15:50:46 0 0 0 -2 2023-12-27 15:50:51 0 0 0 -2 2023-12-27 15:50:56 0 0 0 -2 2023-12-27 15:51:01 0 0 0 -2 2023-12-27 15:51:06 0 0 0 -2 2023-12-27 15:51:11 0 0 0 -2 2023-12-27 15:51:16 0 0 0 -2 2023-12-27 15:51:21 0 0 0 -2 2023-12-27 15:51:26 0 0 0 -2 2023-12-27 15:51:31 0 0 0 -2 2023-12-27 15:51:36 0 0 0 -2 2023-12-27 15:51:41 0 0 0 -2 2023-12-27 15:51:46 0 0 0 -2 2023-12-27 15:51:51 0 0 0 -2 2023-12-27 15:51:56 0 0 0 -2 2023-12-27 15:52:01 0 0 0 -2 2023-12-27 15:52:06 0 0 0 -2 2023-12-27 15:52:11 0 0 0 -2 2023-12-27 15:52:16 0 0 0 -2 2023-12-27 15:52:21 0 0 0 -2 2023-12-27 15:52:26 0 0 0 -2 2023-12-27 15:52:31 0 0 0 -2 2023-12-27 15:52:36 0 0 0 -2 2023-12-27 15:52:41 0 0 0 -2 2023-12-27 15:52:46 0 0 0 -2 2023-12-27 15:52:51 0 0 0 -2 2023-12-27 15:52:56 0 0 0 -2 2023-12-27 15:53:01 0 0 0 -2 2023-12-27 15:53:06 0 0 0 -2 2023-12-27 15:53:11 0 0 0 -2 2023-12-27 15:53:16 0 0 0 -2 2023-12-27 15:53:21 0 0 0 -2 2023-12-27 15:53:26 0 0 0 -3 2023-12-27 15:36:00 0 0 0 -3 2023-12-27 15:39:00 0 0 0 -3 2023-12-27 15:41:52 0 0 0 -3 2023-12-27 15:41:57 0 0 0 -3 2023-12-27 15:42:02 0 0 0 -3 2023-12-27 15:42:07 0 0 0 -3 2023-12-27 15:42:12 0 0 0 -3 2023-12-27 15:42:17 0 0 0 -3 2023-12-27 15:42:22 0 0 0 -3 2023-12-27 15:42:27 0 0 0 -3 2023-12-27 15:42:32 0 0 0 -3 2023-12-27 15:42:37 0 0 0 -3 2023-12-27 15:42:42 0 0 0 -3 2023-12-27 15:42:47 0 0 0 -3 2023-12-27 15:42:52 0 0 0 -3 2023-12-27 15:42:57 0 0 0 -3 2023-12-27 15:43:02 0 0 0 -3 2023-12-27 15:43:07 0 0 0 -3 2023-12-27 15:43:12 0 0 0 -3 2023-12-27 15:43:17 0 0 0 -3 2023-12-27 15:43:22 0 0 0 -3 2023-12-27 15:43:27 0 0 0 -3 2023-12-27 15:43:32 0 0 0 -3 2023-12-27 15:43:37 0 0 0 -3 2023-12-27 15:43:42 0 0 0 -3 2023-12-27 15:43:47 0 0 0 -3 2023-12-27 15:43:52 0 0 0 -3 2023-12-27 15:43:57 0 0 0 -3 2023-12-27 15:44:02 0 0 0 -3 2023-12-27 15:44:07 0 0 0 -3 2023-12-27 15:44:12 0 0 0 -3 2023-12-27 15:44:17 0 0 0 -3 2023-12-27 15:44:22 0 0 0 -3 2023-12-27 15:44:27 0 0 0 -3 2023-12-27 15:44:32 0 0 0 -3 2023-12-27 15:44:37 0 0 0 -3 2023-12-27 15:44:42 0 0 0 -3 2023-12-27 15:44:47 0 0 0 -3 2023-12-27 15:44:52 0 0 0 -3 2023-12-27 15:44:57 0 0 0 -3 2023-12-27 15:45:02 0 0 0 -3 2023-12-27 15:45:07 0 0 0 -3 2023-12-27 15:45:12 0 0 0 -3 2023-12-27 15:45:17 0 0 0 -3 2023-12-27 15:45:22 0 0 0 -3 2023-12-27 15:45:27 0 0 0 -3 2023-12-27 15:45:32 0 0 0 -3 2023-12-27 15:45:37 0 0 0 -3 2023-12-27 15:45:42 0 0 0 -3 2023-12-27 15:45:47 0 0 0 -3 2023-12-27 15:45:52 0 0 0 -3 2023-12-27 15:45:57 0 0 0 -3 2023-12-27 15:46:02 0 0 0 -3 2023-12-27 15:46:07 0 0 0 -3 2023-12-27 15:46:12 0 0 0 -3 2023-12-27 15:46:17 0 0 0 -3 2023-12-27 15:46:22 0 0 0 -3 2023-12-27 15:46:27 0 0 0 -3 2023-12-27 15:46:32 0 0 0 -3 2023-12-27 15:46:37 0 0 0 -3 2023-12-27 15:46:42 0 0 0 -3 2023-12-27 15:46:47 0 0 0 -3 2023-12-27 15:46:52 0 0 0 -3 2023-12-27 15:46:57 0 0 0 -3 2023-12-27 15:47:02 0 0 0 -3 2023-12-27 15:47:07 0 0 0 -3 2023-12-27 15:47:12 0 0 0 -3 2023-12-27 15:47:17 0 0 0 -3 2023-12-27 15:47:22 0 0 0 -3 2023-12-27 15:47:27 0 0 0 -3 2023-12-27 15:47:32 0 0 0 -3 2023-12-27 15:47:37 0 0 0 -3 2023-12-27 15:47:42 0 0 0 -3 2023-12-27 15:47:47 0 0 0 -3 2023-12-27 15:47:52 0 0 0 -3 2023-12-27 15:47:57 0 0 0 -3 2023-12-27 15:48:02 0 0 0 -3 2023-12-27 15:48:07 0 0 0 -3 2023-12-27 15:48:12 0 0 0 -3 2023-12-27 15:48:17 0 0 0 -3 2023-12-27 15:48:22 0 0 0 -3 2023-12-27 15:48:27 0 0 0 -3 2023-12-27 15:48:32 0 0 0 -3 2023-12-27 15:48:37 0 0 0 -3 2023-12-27 15:48:42 0 0 0 -3 2023-12-27 15:48:47 0 0 0 -3 2023-12-27 15:48:52 0 0 0 -3 2023-12-27 15:48:57 0 0 0 -3 2023-12-27 15:49:02 0 0 0 -3 2023-12-27 15:49:07 0 0 0 -3 2023-12-27 15:49:12 0 0 0 -3 2023-12-27 15:49:17 0 0 0 -3 2023-12-27 15:49:22 0 0 0 -3 2023-12-27 15:49:27 0 0 0 -3 2023-12-27 15:49:32 0 0 0 -3 2023-12-27 15:49:37 0 0 0 -3 2023-12-27 15:49:42 0 0 0 -3 2023-12-27 15:49:47 0 0 0 -3 2023-12-27 15:49:52 0 0 0 -3 2023-12-27 15:49:57 0 0 0 -3 2023-12-27 15:50:02 0 0 0 -3 2023-12-27 15:50:07 0 0 0 -3 2023-12-27 15:50:12 0 0 0 -3 2023-12-27 15:50:17 0 0 0 -3 2023-12-27 15:50:22 0 0 0 -3 2023-12-27 15:50:27 0 0 0 -3 2023-12-27 15:50:32 0 0 0 -3 2023-12-27 15:50:37 0 0 0 -3 2023-12-27 15:50:42 0 0 0 -3 2023-12-27 15:50:47 0 0 0 -3 2023-12-27 15:50:52 0 0 0 -3 2023-12-27 15:50:57 0 0 0 -3 2023-12-27 15:51:02 0 0 0 -3 2023-12-27 15:51:07 0 0 0 -3 2023-12-27 15:51:12 0 0 0 -3 2023-12-27 15:51:17 0 0 0 -3 2023-12-27 15:51:22 0 0 0 -3 2023-12-27 15:51:27 0 0 0 -3 2023-12-27 15:51:32 0 0 0 -3 2023-12-27 15:51:37 0 0 0 -3 2023-12-27 15:51:42 0 0 0 -3 2023-12-27 15:51:47 0 0 0 -3 2023-12-27 15:51:52 0 0 0 -3 2023-12-27 15:51:57 0 0 0 -3 2023-12-27 15:52:02 0 0 0 -3 2023-12-27 15:52:07 0 0 0 -3 2023-12-27 15:52:12 0 0 0 -3 2023-12-27 15:52:17 0 0 0 -3 2023-12-27 15:52:22 0 0 0 -3 2023-12-27 15:52:27 0 0 0 -3 2023-12-27 15:52:32 0 0 0 -3 2023-12-27 15:52:37 0 0 0 -3 2023-12-27 15:52:42 0 0 0 -3 2023-12-27 15:52:47 0 0 0 -3 2023-12-27 15:52:52 0 0 0 -3 2023-12-27 15:52:57 0 0 0 -3 2023-12-27 15:53:02 0 0 0 -3 2023-12-27 15:53:07 0 0 0 -3 2023-12-27 15:53:12 0 0 0 -3 2023-12-27 15:53:17 0 0 0 -3 2023-12-27 15:53:22 0 0 0 -3 2023-12-27 15:53:27 0 0 0 -4 2023-12-27 15:36:00 0 0 0 -4 2023-12-27 15:39:00 0 0 0 -4 2023-12-27 15:41:53 0 0 0 -4 2023-12-27 15:41:58 0 0 0 -4 2023-12-27 15:42:03 0 0 0 -4 2023-12-27 15:42:08 0 0 0 -4 2023-12-27 15:42:13 0 0 0 -4 2023-12-27 15:42:18 0 0 0 -4 2023-12-27 15:42:23 0 0 0 -4 2023-12-27 15:42:28 0 0 0 -4 2023-12-27 15:42:33 0 0 0 -4 2023-12-27 15:42:38 0 0 0 -4 2023-12-27 15:42:43 0 0 0 -4 2023-12-27 15:42:48 0 0 0 -4 2023-12-27 15:42:53 0 0 0 -4 2023-12-27 15:42:58 0 0 0 -4 2023-12-27 15:43:03 0 0 0 -4 2023-12-27 15:43:08 0 0 0 -4 2023-12-27 15:43:13 0 0 0 -4 2023-12-27 15:43:18 0 0 0 -4 2023-12-27 15:43:23 0 0 0 -4 2023-12-27 15:43:28 0 0 0 -4 2023-12-27 15:43:33 0 0 0 -4 2023-12-27 15:43:38 0 0 0 -4 2023-12-27 15:43:43 0 0 0 -4 2023-12-27 15:43:48 0 0 0 -4 2023-12-27 15:43:53 0 0 0 -4 2023-12-27 15:43:58 0 0 0 -4 2023-12-27 15:44:03 0 0 0 -4 2023-12-27 15:44:08 0 0 0 -4 2023-12-27 15:44:13 0 0 0 -4 2023-12-27 15:44:18 0 0 0 -4 2023-12-27 15:44:23 0 0 0 -4 2023-12-27 15:44:28 0 0 0 -4 2023-12-27 15:44:33 0 0 0 -4 2023-12-27 15:44:38 0 0 0 -4 2023-12-27 15:44:43 0 0 0 -4 2023-12-27 15:44:48 0 0 0 -4 2023-12-27 15:44:53 0 0 0 -4 2023-12-27 15:44:58 0 0 0 -4 2023-12-27 15:45:03 0 0 0 -4 2023-12-27 15:45:08 0 0 0 -4 2023-12-27 15:45:13 0 0 0 -4 2023-12-27 15:45:18 0 0 0 -4 2023-12-27 15:45:23 0 0 0 -4 2023-12-27 15:45:28 0 0 0 -4 2023-12-27 15:45:33 0 0 0 -4 2023-12-27 15:45:38 0 0 0 -4 2023-12-27 15:45:43 0 0 0 -4 2023-12-27 15:45:48 0 0 0 -4 2023-12-27 15:45:53 0 0 0 -4 2023-12-27 15:45:58 0 0 0 -4 2023-12-27 15:46:03 0 0 0 -4 2023-12-27 15:46:08 0 0 0 -4 2023-12-27 15:46:13 0 0 0 -4 2023-12-27 15:46:18 0 0 0 -4 2023-12-27 15:46:23 0 0 0 -4 2023-12-27 15:46:28 0 0 0 -4 2023-12-27 15:46:33 0 0 0 -4 2023-12-27 15:46:38 0 0 0 -4 2023-12-27 15:46:43 0 0 0 -4 2023-12-27 15:46:48 0 0 0 -4 2023-12-27 15:46:53 0 0 0 -4 2023-12-27 15:46:58 0 0 0 -4 2023-12-27 15:47:03 0 0 0 -4 2023-12-27 15:47:08 0 0 0 -4 2023-12-27 15:47:13 0 0 0 -4 2023-12-27 15:47:18 0 0 0 -4 2023-12-27 15:47:23 0 0 0 -4 2023-12-27 15:47:28 0 0 0 -4 2023-12-27 15:47:33 0 0 0 -4 2023-12-27 15:47:38 0 0 0 -4 2023-12-27 15:47:43 0 0 0 -4 2023-12-27 15:47:48 0 0 0 -4 2023-12-27 15:47:53 0 0 0 -4 2023-12-27 15:47:58 0 0 0 -4 2023-12-27 15:48:03 0 0 0 -4 2023-12-27 15:48:08 0 0 0 -4 2023-12-27 15:48:13 0 0 0 -4 2023-12-27 15:48:18 0 0 0 -4 2023-12-27 15:48:23 0 0 0 -4 2023-12-27 15:48:28 0 0 0 -4 2023-12-27 15:48:33 0 0 0 -4 2023-12-27 15:48:38 0 0 0 -4 2023-12-27 15:48:43 0 0 0 -4 2023-12-27 15:48:48 0 0 0 -4 2023-12-27 15:48:53 0 0 0 -4 2023-12-27 15:48:58 0 0 0 -4 2023-12-27 15:49:03 0 0 0 -4 2023-12-27 15:49:08 0 0 0 -4 2023-12-27 15:49:13 0 0 0 -4 2023-12-27 15:49:18 0 0 0 -4 2023-12-27 15:49:23 0 0 0 -4 2023-12-27 15:49:28 0 0 0 -4 2023-12-27 15:49:33 0 0 0 -4 2023-12-27 15:49:38 0 0 0 -4 2023-12-27 15:49:43 0 0 0 -4 2023-12-27 15:49:48 0 0 0 -4 2023-12-27 15:49:53 0 0 0 -4 2023-12-27 15:49:58 0 0 0 -4 2023-12-27 15:50:03 0 0 0 -4 2023-12-27 15:50:08 0 0 0 -4 2023-12-27 15:50:13 0 0 0 -4 2023-12-27 15:50:18 0 0 0 -4 2023-12-27 15:50:23 0 0 0 -4 2023-12-27 15:50:28 0 0 0 -4 2023-12-27 15:50:33 0 0 0 -4 2023-12-27 15:50:38 0 0 0 -4 2023-12-27 15:50:43 0 0 0 -4 2023-12-27 15:50:48 0 0 0 -4 2023-12-27 15:50:53 0 0 0 -4 2023-12-27 15:50:58 0 0 0 -4 2023-12-27 15:51:03 0 0 0 -4 2023-12-27 15:51:08 0 0 0 -4 2023-12-27 15:51:13 0 0 0 -4 2023-12-27 15:51:18 0 0 0 -4 2023-12-27 15:51:23 0 0 0 -4 2023-12-27 15:51:28 0 0 0 -4 2023-12-27 15:51:33 0 0 0 -4 2023-12-27 15:51:38 0 0 0 -4 2023-12-27 15:51:43 0 0 0 -4 2023-12-27 15:51:48 0 0 0 -4 2023-12-27 15:51:53 0 0 0 -4 2023-12-27 15:51:58 0 0 0 -4 2023-12-27 15:52:03 0 0 0 -4 2023-12-27 15:52:08 0 0 0 -4 2023-12-27 15:52:13 0 0 0 -4 2023-12-27 15:52:18 0 0 0 -4 2023-12-27 15:52:23 0 0 0 -4 2023-12-27 15:52:28 0 0 0 -4 2023-12-27 15:52:33 0 0 0 -4 2023-12-27 15:52:38 0 0 0 -4 2023-12-27 15:52:43 0 0 0 -4 2023-12-27 15:52:48 0 0 0 -4 2023-12-27 15:52:53 0 0 0 -4 2023-12-27 15:52:58 0 0 0 -4 2023-12-27 15:53:03 0 0 0 -4 2023-12-27 15:53:08 0 0 0 -4 2023-12-27 15:53:13 0 0 0 -4 2023-12-27 15:53:18 0 0 0 -4 2023-12-27 15:53:23 0 0 0 -4 2023-12-27 15:53:28 0 0 0 diff --git a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql index d1019946e2a..bae6bd95571 100644 --- a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql +++ b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql @@ -24,9 +24,4 @@ WHERE cur < prev LIMIT 2 SETTINGS max_threads = 1; --- I would like just to check whether the sorting order is not broken. -SELECT * -FROM ttl_group_by_bug -SETTINGS max_threads=1; - DROP TABLE IF EXISTS ttl_group_by_bug; From 26b553c3b656994fc956d4b3c5d70adcb65e9d9a Mon Sep 17 00:00:00 2001 From: Pengyuan Bian Date: Wed, 27 Dec 2023 18:27:36 +0000 Subject: [PATCH 52/88] format. --- .../test.py | 41 ++++++++++++------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/tests/integration/test_attach_table_from_s3_plain_readonly/test.py b/tests/integration/test_attach_table_from_s3_plain_readonly/test.py index 52ae74e02f8..15ba934e621 100644 --- a/tests/integration/test_attach_table_from_s3_plain_readonly/test.py +++ b/tests/integration/test_attach_table_from_s3_plain_readonly/test.py @@ -2,10 +2,8 @@ import re import os import logging import pytest -import json from helpers.cluster import ClickHouseCluster -from helpers.test_tools import assert_eq_with_retry from minio.error import S3Error from pathlib import Path @@ -33,20 +31,25 @@ node2 = cluster.add_instance( uuid_regex = re.compile("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}") -def upload_to_minio(minio_client, bucket_name, local_path, minio_path=''): + +def upload_to_minio(minio_client, bucket_name, local_path, minio_path=""): local_path = Path(local_path) for root, _, files in os.walk(local_path): for file in files: local_file_path = Path(root) / file - minio_object_name = minio_path + str(local_file_path.relative_to(local_path)) + minio_object_name = minio_path + str( + local_file_path.relative_to(local_path) + ) try: - with open(local_file_path, 'rb') as data: + with open(local_file_path, "rb") as data: file_stat = os.stat(local_file_path) - minio_client.put_object(bucket_name, minio_object_name, data, file_stat.st_size) - logging.info(f'Uploaded {local_file_path} to {minio_object_name}') + minio_client.put_object( + bucket_name, minio_object_name, data, file_stat.st_size + ) + logging.info(f"Uploaded {local_file_path} to {minio_object_name}") except S3Error as e: - logging.error(f'Error uploading {local_file_path}: {e}') + logging.error(f"Error uploading {local_file_path}: {e}") @pytest.fixture(scope="module") @@ -62,7 +65,7 @@ def started_cluster(): def test_attach_table_from_s3_plain_readonly(started_cluster): # Create an atomic DB with mergetree sample data node1.query( - """ + """ create database local_db; create table local_db.test_table (num UInt32) engine=MergeTree() order by num; @@ -72,23 +75,31 @@ def test_attach_table_from_s3_plain_readonly(started_cluster): ) assert int(node1.query("select num from local_db.test_table limit 1")) == 5 - + # Copy local MergeTree data into minio bucket table_data_path = os.path.join(node1.path, f"database/store") minio = cluster.minio_client - upload_to_minio(minio, cluster.minio_bucket, table_data_path, "data/disks/disk_s3_plain/store/") + upload_to_minio( + minio, cluster.minio_bucket, table_data_path, "data/disks/disk_s3_plain/store/" + ) # Drop the non-replicated table, we don't need it anymore - table_uuid = node1.query("SELECT uuid FROM system.tables WHERE database='local_db' AND table='test_table'").strip() + table_uuid = node1.query( + "SELECT uuid FROM system.tables WHERE database='local_db' AND table='test_table'" + ).strip() node1.query("drop table local_db.test_table SYNC;") # Create a replicated database - node1.query("create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica1');") - node2.query("create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica2');") + node1.query( + "create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica1');" + ) + node2.query( + "create database s3_plain_test_db ENGINE = Replicated('/test/s3_plain_test_db', 'shard1', 'replica2');" + ) # Create a MergeTree table at one node, by attaching the merge tree data node1.query( - f""" + f""" attach table s3_plain_test_db.test_table UUID '{table_uuid}' (num UInt32) engine=MergeTree() order by num From f5bcfaffa593a6f42df1d8907eb2df5c6de7e5b8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 19:28:50 +0100 Subject: [PATCH 53/88] disable vertical merges with cleanup --- src/Storages/MergeTree/MergeTask.cpp | 8 +++- src/Storages/MergeTree/MergeTreeSettings.h | 3 +- src/Storages/StorageMergeTree.cpp | 4 ++ src/Storages/StorageReplicatedMergeTree.cpp | 7 ++- ...77_replacing_merge_tree_vertical_merge.sql | 4 +- ...ing_merge_tree_is_deleted_column.reference | 22 ++++++++++ ...replacing_merge_tree_is_deleted_column.sql | 44 ++++++++++++------- ...2861_replacing_merge_tree_with_cleanup.sql | 3 +- 8 files changed, 73 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 6401427964b..18503ebec01 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -41,6 +41,7 @@ namespace ErrorCodes extern const int ABORTED; extern const int DIRECTORY_ALREADY_EXISTS; extern const int LOGICAL_ERROR; + extern const int SUPPORT_IS_DISABLED; } @@ -1005,10 +1006,13 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() break; case MergeTreeData::MergingParams::Replacing: + if (global_ctx->cleanup && !data_settings->allow_experimental_replacing_merge_with_cleanup) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); + merged_transform = std::make_shared( header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column, merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, - (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup); + global_ctx->cleanup); break; case MergeTreeData::MergingParams::Graphite: @@ -1086,6 +1090,8 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm return MergeAlgorithm::Horizontal; if (global_ctx->future_part->part_format.storage_type != MergeTreeDataPartStorageType::Full) return MergeAlgorithm::Horizontal; + if (global_ctx->cleanup) + return MergeAlgorithm::Horizontal; if (!data_settings->allow_vertical_merges_from_compact_to_wide_parts) { diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index ef41dfe2c98..eb6c14d7754 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -74,7 +74,6 @@ struct Settings; M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ M(UInt64, number_of_free_entries_in_pool_to_execute_optimize_entire_partition, 25, "When there is less than specified number of free entries in pool, do not try to execute optimize entire partition with a merge (this merge is created when set min_age_to_force_merge_seconds > 0 and min_age_to_force_merge_on_partition_only = true). This is to leave free threads for regular merges and avoid \"Too many parts\"", 0) \ M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ - M(CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never, "Is the Replicated Merge cleanup has to be done automatically at each merge or manually (possible values are 'Always'/'Never' (default))", 0) \ M(UInt64, replicated_max_mutations_in_one_entry, 10000, "Max number of mutation commands that can be merged together and executed in one MUTATE_PART entry (0 means unlimited)", 0) \ M(UInt64, number_of_mutations_to_delay, 500, "If table has at least that many unfinished mutations, artificially slow down mutations of table. Disabled if set to 0", 0) \ M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ @@ -193,6 +192,7 @@ struct Settings; M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, allow_experimental_block_number_column, false, "Enable persisting column _block_number for each row.", 0) \ + M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ @@ -233,6 +233,7 @@ struct Settings; MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_send_timeout, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, Seconds, replicated_fetches_http_receive_timeout, 0) \ MAKE_OBSOLETE_MERGE_TREE_SETTING(M, UInt64, replicated_max_parallel_fetches_for_host, DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) \ + MAKE_OBSOLETE_MERGE_TREE_SETTING(M, CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never) \ /// Settings that should not change after the creation of a table. /// NOLINTNEXTLINE diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 3f4466699ea..e7ca50f4a5c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -62,6 +62,7 @@ namespace ErrorCodes extern const int UNKNOWN_POLICY; extern const int NO_SUCH_DATA_PART; extern const int ABORTED; + extern const int SUPPORT_IS_DISABLED; } namespace ActionLocks @@ -1530,6 +1531,9 @@ bool StorageMergeTree::optimize( throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); } + if (cleanup && !getSettings()->allow_experimental_replacing_merge_with_cleanup) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); + DataPartsVector data_parts = getVisibleDataPartsVector(local_context); std::unordered_set partition_ids; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index beccfe88d0e..c5d61c85e08 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3643,7 +3643,6 @@ void StorageReplicatedMergeTree::mergeSelectingTask() const auto storage_settings_ptr = getSettings(); const bool deduplicate = false; /// TODO: read deduplicate option from table config const Names deduplicate_by_columns = {}; - const bool cleanup = (storage_settings_ptr->clean_deleted_rows != CleanDeletedRows::Never); CreateMergeEntryResult create_result = CreateMergeEntryResult::Other; enum class AttemptStatus @@ -3727,7 +3726,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() future_merged_part->part_format, deduplicate, deduplicate_by_columns, - cleanup, + /*cleanup*/ false, nullptr, merge_pred->getVersion(), future_merged_part->merge_type); @@ -5637,7 +5636,11 @@ bool StorageReplicatedMergeTree::optimize( throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader"); if (cleanup) + { + if (!getSettings()->allow_experimental_replacing_merge_with_cleanup) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental merges with CLEANUP are not allowed"); LOG_DEBUG(log, "Cleanup the ReplicatedMergeTree."); + } auto handle_noop = [&](FormatStringHelper fmt_string, Args && ...args) { diff --git a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql index e3c1bb10426..871f96bb019 100644 --- a/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql +++ b/tests/queries/0_stateless/00577_replacing_merge_tree_vertical_merge.sql @@ -3,7 +3,7 @@ set optimize_on_insert = 0; drop table if exists tab_00577; create table tab_00577 (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date settings enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0, - min_bytes_for_wide_part = 0; + min_bytes_for_wide_part = 0, allow_experimental_replacing_merge_with_cleanup=1; insert into tab_00577 values ('2018-01-01', 2, 2), ('2018-01-01', 1, 1); insert into tab_00577 values ('2018-01-01', 0, 0); select * from tab_00577 order by version; @@ -16,7 +16,7 @@ DROP TABLE IF EXISTS testCleanupR1; CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted) ORDER BY uid SETTINGS enable_vertical_merge_algorithm = 1, vertical_merge_algorithm_min_rows_to_activate = 0, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 0, - min_bytes_for_wide_part = 0; + min_bytes_for_wide_part = 0, allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0); INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1); INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1); diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference index 04a2b75bb4f..c897004b4e3 100644 --- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference +++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference @@ -91,6 +91,28 @@ d4 1 0 == (Replicas) Test settings == c2 1 0 c4 1 0 +no cleanup 1 d1 5 0 +no cleanup 1 d2 1 0 +no cleanup 1 d3 1 0 +no cleanup 1 d4 3 0 +no cleanup 1 d5 1 0 +no cleanup 2 d1 5 0 +no cleanup 2 d2 1 0 +no cleanup 2 d3 1 0 +no cleanup 2 d4 3 0 +no cleanup 2 d5 1 0 +no cleanup 2 d6 2 1 +no cleanup 3 d1 5 0 +no cleanup 3 d2 1 0 +no cleanup 3 d3 1 0 +no cleanup 3 d4 3 0 +no cleanup 3 d5 1 0 +no cleanup 4 d1 5 0 +no cleanup 4 d2 1 0 +no cleanup 4 d3 1 0 +no cleanup 4 d4 3 0 +no cleanup 4 d5 1 0 +no cleanup 4 d6 2 1 == Check cleanup & settings for other merge trees == d1 1 1 d1 1 1 diff --git a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql index 8549300d49f..80c18ae308b 100644 --- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql +++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql @@ -5,7 +5,7 @@ set allow_deprecated_syntax_for_merge_tree=0; -- Test the bahaviour without the is_deleted column DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid); +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); SELECT '== Test SELECT ... FINAL - no is_deleted =='; select * from test FINAL order by uid; @@ -13,7 +13,7 @@ OPTIMIZE TABLE test FINAL CLEANUP; select * from test order by uid; DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); SELECT '== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always =='; select * from test FINAL order by uid; @@ -22,7 +22,7 @@ select * from test order by uid; -- Test the new behaviour DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); SELECT '== Test SELECT ... FINAL =='; select * from test FINAL order by uid; @@ -37,7 +37,7 @@ INSERT INTO test (*) VALUES ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, select * from test FINAL order by uid; DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; -- Expect d6 to be version=3 is_deleted=false INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); @@ -56,7 +56,7 @@ OPTIMIZE TABLE test FINAL CLEANUP; select * from test order by uid; DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; SELECT '== Test of the SETTINGS clean_deleted_rows as Always =='; INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); @@ -66,7 +66,7 @@ select * from test order by uid; OPTIMIZE TABLE test FINAL; -- d6 has to be removed since we set clean_deleted_rows as 'Always' -select * from test order by uid; +select * from test where is_deleted=0 order by uid; SELECT '== Test of the SETTINGS clean_deleted_rows as Never =='; ALTER TABLE test MODIFY SETTING clean_deleted_rows='Never'; @@ -80,7 +80,7 @@ DROP TABLE IF EXISTS testCleanupR1; CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted) - ORDER BY uid; + ORDER BY uid settings allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0); @@ -101,7 +101,7 @@ DROP TABLE IF EXISTS testSettingsR1; CREATE TABLE testSettingsR1 (col1 String, version UInt32, is_deleted UInt8) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_setting/', 'r1', version, is_deleted) ORDER BY col1 - SETTINGS clean_deleted_rows = 'Always'; + SETTINGS clean_deleted_rows = 'Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testSettingsR1 (*) VALUES ('c1', 1, 1),('c2', 1, 0),('c3', 1, 1),('c4', 1, 0); SYSTEM SYNC REPLICA testSettingsR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet" @@ -110,13 +110,13 @@ OPTIMIZE TABLE testSettingsR1 FINAL; -- Only d3 to d5 remain SELECT '== (Replicas) Test settings =='; -SELECT * FROM testSettingsR1 order by col1; +SELECT * FROM testSettingsR1 where is_deleted=0 order by col1; ------------------------------ -- Check errors DROP TABLE IF EXISTS test; -CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) settings allow_experimental_replacing_merge_with_cleanup=1; -- is_deleted == 0/1 INSERT INTO test (*) VALUES ('d1', 1, 2); -- { serverError INCORRECT_DATA } @@ -125,35 +125,49 @@ DROP TABLE IF EXISTS test; -- checkis_deleted type CREATE TABLE test (uid String, version UInt32, is_deleted String) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); -- { serverError BAD_TYPE_OF_FIELD } +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +select 'no cleanup 1', * from test FINAL order by uid; +OPTIMIZE TABLE test FINAL CLEANUP; -- { serverError SUPPORT_IS_DISABLED } +select 'no cleanup 2', * from test order by uid; +DROP TABLE test; + +CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/no_cleanup/', 'r1', version, is_deleted) Order by (uid); +INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0); +select 'no cleanup 3', * from test FINAL order by uid; +OPTIMIZE TABLE test FINAL CLEANUP; -- { serverError SUPPORT_IS_DISABLED } +select 'no cleanup 4', * from test order by uid; +DROP TABLE test; + -- is_deleted column for other mergeTrees - ErrorCodes::LOGICAL_ERROR) -- Check clean_deleted_rows='Always' for other MergeTrees SELECT '== Check cleanup & settings for other merge trees =='; -CREATE TABLE testMT (uid String, version UInt32, is_deleted UInt8) ENGINE = MergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE testMT (uid String, version UInt32, is_deleted UInt8) ENGINE = MergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testMT (*) VALUES ('d1', 1, 1); OPTIMIZE TABLE testMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testMT FINAL; SELECT * FROM testMT order by uid; -CREATE TABLE testSummingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = SummingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE testSummingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = SummingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testSummingMT (*) VALUES ('d1', 1, 1); OPTIMIZE TABLE testSummingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testSummingMT FINAL; SELECT * FROM testSummingMT order by uid; -CREATE TABLE testAggregatingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = AggregatingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE testAggregatingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = AggregatingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testAggregatingMT (*) VALUES ('d1', 1, 1); OPTIMIZE TABLE testAggregatingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testAggregatingMT FINAL; SELECT * FROM testAggregatingMT order by uid; -CREATE TABLE testCollapsingMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = CollapsingMergeTree(sign) Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE testCollapsingMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = CollapsingMergeTree(sign) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testCollapsingMT (*) VALUES ('d1', 1, 1, 1); OPTIMIZE TABLE testCollapsingMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testCollapsingMT FINAL; SELECT * FROM testCollapsingMT order by uid; -CREATE TABLE testVersionedCMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = VersionedCollapsingMergeTree(sign, version) Order by (uid) SETTINGS clean_deleted_rows='Always'; +CREATE TABLE testVersionedCMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = VersionedCollapsingMergeTree(sign, version) Order by (uid) SETTINGS clean_deleted_rows='Always', allow_experimental_replacing_merge_with_cleanup=1; INSERT INTO testVersionedCMT (*) VALUES ('d1', 1, 1, 1); OPTIMIZE TABLE testVersionedCMT FINAL CLEANUP; -- { serverError CANNOT_ASSIGN_OPTIMIZE } OPTIMIZE TABLE testVersionedCMT FINAL; diff --git a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql index 7b78e2900e7..4cd44a131e3 100644 --- a/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql +++ b/tests/queries/0_stateless/02861_replacing_merge_tree_with_cleanup.sql @@ -2,7 +2,8 @@ DROP TABLE IF EXISTS test; CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 0, min_rows_for_wide_part = 1, - min_bytes_for_wide_part = 1; + min_bytes_for_wide_part = 1, + allow_experimental_replacing_merge_with_cleanup=1; -- Expect d6 to be version=3 is_deleted=false INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0), ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0); From a3cba8e06f5fc5f5920f35b72f0f53459a20ce88 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 27 Dec 2023 20:27:15 +0100 Subject: [PATCH 54/88] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0089eeada00..f143a2ec78b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1835,9 +1835,13 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd return {}; } + size_t retries_count = 0; + constexpr size_t MAX_RETRIES_ON_SHUTDOWN = 3; while (true) { - if (shutdown_called || partial_shutdown_called) + /// It still makes sense to make a few attempts on shutdown because we already did some job to create a part + /// and also we want to reduce the probability of issues with unexpected parts on restart + if (++retries_count > MAX_RETRIES_ON_SHUTDOWN && (shutdown_called || partial_shutdown_called)) throw Exception(ErrorCodes::ABORTED, "Cannot commit part because shutdown called"); Coordination::Requests ops; From c762898adb2c509289f78d96c1716b8f1b0c7e5e Mon Sep 17 00:00:00 2001 From: koloshmet Date: Sun, 12 Feb 2023 21:17:55 +0200 Subject: [PATCH 55/88] refreshable materialized views --- src/Access/Common/AccessType.h | 1 + src/CMakeLists.txt | 1 + src/Common/CurrentMetrics.cpp | 1 + src/Common/IntervalKind.h | 2 + src/Interpreters/ActionLocksManager.cpp | 1 + src/Interpreters/Context.cpp | 4 + src/Interpreters/Context.h | 4 + src/Interpreters/InterpreterSystemQuery.cpp | 54 ++++ src/Interpreters/InterpreterSystemQuery.h | 3 + src/Parsers/ASTCreateQuery.cpp | 7 +- src/Parsers/ASTCreateQuery.h | 2 + src/Parsers/ASTRefreshStrategy.cpp | 74 +++++ src/Parsers/ASTRefreshStrategy.h | 36 +++ src/Parsers/ASTSystemQuery.h | 8 + src/Parsers/ASTTimeInterval.cpp | 40 +++ src/Parsers/ASTTimeInterval.h | 37 +++ src/Parsers/ParserCreateQuery.cpp | 14 +- src/Parsers/ParserRefreshStrategy.cpp | 77 +++++ src/Parsers/ParserRefreshStrategy.h | 16 + src/Parsers/ParserSystemQuery.cpp | 14 + src/Parsers/ParserTimeInterval.cpp | 71 +++++ src/Parsers/ParserTimeInterval.h | 24 ++ .../Executors/ManualPipelineExecutor.cpp | 55 ++++ .../Executors/ManualPipelineExecutor.h | 27 ++ src/QueryPipeline/QueryPipeline.h | 1 + .../MaterializedView/RefreshAllCombiner.cpp | 58 ++++ .../MaterializedView/RefreshAllCombiner.h | 33 ++ .../MaterializedView/RefreshDependencies.cpp | 60 ++++ .../MaterializedView/RefreshDependencies.h | 56 ++++ src/Storages/MaterializedView/RefreshSet.cpp | 128 ++++++++ src/Storages/MaterializedView/RefreshSet.h | 142 +++++++++ src/Storages/MaterializedView/RefreshTask.cpp | 292 ++++++++++++++++++ src/Storages/MaterializedView/RefreshTask.h | 150 +++++++++ .../MaterializedView/RefreshTask_fwd.h | 15 + .../MaterializedView/RefreshTimers.cpp | 243 +++++++++++++++ src/Storages/MaterializedView/RefreshTimers.h | 69 +++++ .../MaterializedView/tests/gtest_timers.cpp | 27 ++ src/Storages/StorageMaterializedView.cpp | 122 +++++++- src/Storages/StorageMaterializedView.h | 15 + .../System/StorageSystemViewRefreshes.cpp | 67 ++++ .../System/StorageSystemViewRefreshes.h | 27 ++ src/Storages/System/attachSystemTables.cpp | 2 + 42 files changed, 2065 insertions(+), 15 deletions(-) create mode 100644 src/Parsers/ASTRefreshStrategy.cpp create mode 100644 src/Parsers/ASTRefreshStrategy.h create mode 100644 src/Parsers/ASTTimeInterval.cpp create mode 100644 src/Parsers/ASTTimeInterval.h create mode 100644 src/Parsers/ParserRefreshStrategy.cpp create mode 100644 src/Parsers/ParserRefreshStrategy.h create mode 100644 src/Parsers/ParserTimeInterval.cpp create mode 100644 src/Parsers/ParserTimeInterval.h create mode 100644 src/Processors/Executors/ManualPipelineExecutor.cpp create mode 100644 src/Processors/Executors/ManualPipelineExecutor.h create mode 100644 src/Storages/MaterializedView/RefreshAllCombiner.cpp create mode 100644 src/Storages/MaterializedView/RefreshAllCombiner.h create mode 100644 src/Storages/MaterializedView/RefreshDependencies.cpp create mode 100644 src/Storages/MaterializedView/RefreshDependencies.h create mode 100644 src/Storages/MaterializedView/RefreshSet.cpp create mode 100644 src/Storages/MaterializedView/RefreshSet.h create mode 100644 src/Storages/MaterializedView/RefreshTask.cpp create mode 100644 src/Storages/MaterializedView/RefreshTask.h create mode 100644 src/Storages/MaterializedView/RefreshTask_fwd.h create mode 100644 src/Storages/MaterializedView/RefreshTimers.cpp create mode 100644 src/Storages/MaterializedView/RefreshTimers.h create mode 100644 src/Storages/MaterializedView/tests/gtest_timers.cpp create mode 100644 src/Storages/System/StorageSystemViewRefreshes.cpp create mode 100644 src/Storages/System/StorageSystemViewRefreshes.h diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 45d427a7c55..1f0e678461f 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -177,6 +177,7 @@ enum class AccessType M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \ M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \ M(SYSTEM_CLEANUP, "SYSTEM STOP CLEANUP, SYSTEM START CLEANUP", TABLE, SYSTEM) \ + M(SYSTEM_VIEWS, "SYSTEM REFRESH VIEW, START VIEWS, STOP VIEWS, CANCEL VIEW, PAUSE VIEW, RESUME VIEW", VIEW, SYSTEM) \ M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6063c701708..86cb9acd056 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -226,6 +226,7 @@ add_object_library(clickhouse_storages_statistics Storages/Statistics) add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_storages_windowview Storages/WindowView) add_object_library(clickhouse_storages_s3queue Storages/S3Queue) +add_object_library(clickhouse_storages_materializedview Storages/MaterializedView) add_object_library(clickhouse_client Client) add_object_library(clickhouse_bridge BridgeHelper) add_object_library(clickhouse_server Server) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 38b14e4b0b4..fccdeaa3c57 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -253,6 +253,7 @@ M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \ M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ + M(Refresh, "Number of active refreshes") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index 6893286f196..0f45d0ac169 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -71,6 +71,8 @@ struct IntervalKind /// Returns false if the conversion did not succeed. /// For example, `IntervalKind::tryParseString('second', result)` returns `result` equals `IntervalKind::Kind::Second`. static bool tryParseString(const std::string & kind, IntervalKind::Kind & result); + + auto operator<=>(const IntervalKind & other) const { return kind <=> other.kind; } }; /// NOLINTNEXTLINE diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp index fb5ef4b98ae..43b49b024aa 100644 --- a/src/Interpreters/ActionLocksManager.cpp +++ b/src/Interpreters/ActionLocksManager.cpp @@ -18,6 +18,7 @@ namespace ActionLocks extern const StorageActionBlockType PartsMove = 7; extern const StorageActionBlockType PullReplicationLog = 8; extern const StorageActionBlockType Cleanup = 9; + extern const StorageActionBlockType ViewRefresh = 8; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 25146ebc10d..fda22e4075e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -95,6 +95,7 @@ #include #include #include +#include #include #include #include @@ -289,6 +290,7 @@ struct ContextSharedPart : boost::noncopyable MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) ReplicatedFetchList replicated_fetch_list; + RefreshSet refresh_set; /// The list of active refreshes (for MaterializedView) ConfigurationPtr users_config TSA_GUARDED_BY(mutex); /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. @@ -825,6 +827,8 @@ MovesList & Context::getMovesList() { return shared->moves_list; } const MovesList & Context::getMovesList() const { return shared->moves_list; } ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; } const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; } +RefreshSet & Context::getRefreshSet() { return shared->refresh_set; } +const RefreshSet & Context::getRefreshSet() const { return shared->refresh_set; } String Context::resolveDatabase(const String & database_name) const { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 39d2212ce80..b09eeb8ca2d 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -74,6 +74,7 @@ class BackgroundSchedulePool; class MergeList; class MovesList; class ReplicatedFetchList; +class RefreshSet; class Cluster; class Compiler; class MarkCache; @@ -922,6 +923,9 @@ public: ReplicatedFetchList & getReplicatedFetchList(); const ReplicatedFetchList & getReplicatedFetchList() const; + RefreshSet & getRefreshSet(); + const RefreshSet & getRefreshSet() const; + /// If the current session is expired at the time of the call, synchronously creates and returns a new session with the startNewSession() call. /// If no ZooKeeper configured, throws an exception. std::shared_ptr getZooKeeper() const; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index fc040e2af04..2f504e97857 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -108,6 +109,7 @@ namespace ActionLocks extern const StorageActionBlockType PartsMove; extern const StorageActionBlockType PullReplicationLog; extern const StorageActionBlockType Cleanup; + extern const StorageActionBlockType ViewRefresh; } @@ -165,6 +167,8 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) return AccessType::SYSTEM_PULLING_REPLICATION_LOG; else if (action_type == ActionLocks::Cleanup) return AccessType::SYSTEM_CLEANUP; + else if (action_type == ActionLocks::ViewRefresh) + return AccessType::SYSTEM_VIEWS; else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } @@ -605,6 +609,30 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_CLEANUP: startStopAction(ActionLocks::Cleanup, true); break; + case Type::START_VIEWS: + startStopAction(ActionLocks::ViewRefresh, true); + break; + case Type::STOP_VIEWS: + startStopAction(ActionLocks::ViewRefresh, false); + break; + case Type::START_VIEW: + startStopAction(ActionLocks::ViewRefresh, true); + break; + case Type::STOP_VIEW: + startStopAction(ActionLocks::ViewRefresh, false); + break; + case Type::REFRESH_VIEW: + getRefreshTask()->run(); + break; + case Type::CANCEL_VIEW: + getRefreshTask()->cancel(); + break; + case Type::PAUSE_VIEW: + getRefreshTask()->pause(); + break; + case Type::RESUME_VIEW: + getRefreshTask()->resume(); + break; case Type::DROP_REPLICA: dropReplica(query); break; @@ -1092,6 +1120,17 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RESTART DISK is not supported"); } +RefreshTaskHolder InterpreterSystemQuery::getRefreshTask() +{ + auto ctx = getContext(); + ctx->checkAccess(AccessType::SYSTEM_VIEWS); + auto task = ctx->getRefreshSet().getTask(table_id); + if (!task) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Refreshable view {} doesn't exist", table_id.getNameForLogs()); + return task; +} + AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() const { @@ -1241,6 +1280,21 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_REPLICATION_QUEUES, query.getDatabase(), query.getTable()); break; } + case Type::REFRESH_VIEW: + case Type::START_VIEW: + case Type::START_VIEWS: + case Type::STOP_VIEW: + case Type::STOP_VIEWS: + case Type::CANCEL_VIEW: + case Type::PAUSE_VIEW: + case Type::RESUME_VIEW: + { + if (!query.table) + required_access.emplace_back(AccessType::SYSTEM_VIEWS); + else + required_access.emplace_back(AccessType::SYSTEM_VIEWS, query.getDatabase(), query.getTable()); + break; + } case Type::DROP_REPLICA: case Type::DROP_DATABASE_REPLICA: { diff --git a/src/Interpreters/InterpreterSystemQuery.h b/src/Interpreters/InterpreterSystemQuery.h index 462449623d0..89de7402b4d 100644 --- a/src/Interpreters/InterpreterSystemQuery.h +++ b/src/Interpreters/InterpreterSystemQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,8 @@ private: void flushDistributed(ASTSystemQuery & query); [[noreturn]] void restartDisk(String & name); + RefreshTaskHolder getRefreshTask(); + AccessRightsElements getRequiredAccessForDDLOnCluster() const; void startStopAction(StorageActionBlockType action_type, bool start); }; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index 1562586bd93..9d5f0bcddbd 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -340,6 +339,12 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat formatOnCluster(settings); } + if (refresh_strategy) + { + settings.ostr << settings.nl_or_ws; + refresh_strategy->formatImpl(settings, state, frame); + } + if (to_table_id) { assert((is_materialized_view || is_window_view) && to_inner_uuid == UUIDHelpers::Nil); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 28f5e05802b..49a0140625c 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -116,6 +117,7 @@ public: ASTExpressionList * dictionary_attributes_list = nullptr; /// attributes of ASTDictionary * dictionary = nullptr; /// dictionary definition (layout, primary key, etc.) + ASTRefreshStrategy * refresh_strategy = nullptr; // For CREATE MATERIALIZED VIEW ... REFRESH ... std::optional live_view_periodic_refresh; /// For CREATE LIVE VIEW ... WITH [PERIODIC] REFRESH ... bool is_watermark_strictly_ascending{false}; /// STRICTLY ASCENDING WATERMARK STRATEGY FOR WINDOW VIEW diff --git a/src/Parsers/ASTRefreshStrategy.cpp b/src/Parsers/ASTRefreshStrategy.cpp new file mode 100644 index 00000000000..f168bcc09c1 --- /dev/null +++ b/src/Parsers/ASTRefreshStrategy.cpp @@ -0,0 +1,74 @@ +#include + +#include + +namespace DB +{ + +ASTPtr ASTRefreshStrategy::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + + if (interval) + res->set(res->interval, interval->clone()); + if (period) + res->set(res->period, period->clone()); + if (periodic_offset) + res->set(res->periodic_offset, periodic_offset->clone()); + if (spread) + res->set(res->spread, spread->clone()); + if (settings) + res->set(res->settings, settings->clone()); + if (dependencies) + res->set(res->dependencies, dependencies->clone()); + res->interval = interval; + res->spread = spread; + res->schedule_kind = schedule_kind; + return res; +} + +void ASTRefreshStrategy::formatImpl( + const IAST::FormatSettings & f_settings, IAST::FormatState & state, IAST::FormatStateStacked frame) const +{ + frame.need_parens = false; + + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << "REFRESH "; + using enum ScheduleKind; + switch (schedule_kind) + { + case AFTER: + f_settings.ostr << "AFTER "; + interval->formatImpl(f_settings, state, frame); + break; + case EVERY: + f_settings.ostr << "EVERY "; + period->formatImpl(f_settings, state, frame); + if (periodic_offset) + { + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " OFFSET "; + periodic_offset->formatImpl(f_settings, state, frame); + } + break; + default: + break; + } + + if (spread) + { + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " RANDOMIZE FOR "; + spread->formatImpl(f_settings, state, frame); + } + if (dependencies) + { + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " DEPENDS ON "; + dependencies->formatImpl(f_settings, state, frame); + } + if (settings) + { + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " SETTINGS "; + settings->formatImpl(f_settings, state, frame); + } +} + +} diff --git a/src/Parsers/ASTRefreshStrategy.h b/src/Parsers/ASTRefreshStrategy.h new file mode 100644 index 00000000000..0df6a6e0e10 --- /dev/null +++ b/src/Parsers/ASTRefreshStrategy.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +/// Strategy for MATERIALIZED VIEW ... REFRESH .. +class ASTRefreshStrategy : public IAST +{ +public: + enum class ScheduleKind : UInt8 + { + UNKNOWN = 0, + AFTER, + EVERY + }; + + ASTSetQuery * settings = nullptr; + ASTExpressionList * dependencies = nullptr; + ASTTimeInterval * interval = nullptr; + ASTTimePeriod * period = nullptr; + ASTTimeInterval * periodic_offset = nullptr; + ASTTimePeriod * spread = nullptr; + ScheduleKind schedule_kind{ScheduleKind::UNKNOWN}; + + String getID(char) const override { return "Refresh strategy definition"; } + + ASTPtr clone() const override; + + void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 8e6100fe7b4..ec8e47f9513 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -90,6 +90,14 @@ public: STOP_CLEANUP, START_CLEANUP, RESET_COVERAGE, + REFRESH_VIEW, + START_VIEW, + START_VIEWS, + STOP_VIEW, + STOP_VIEWS, + CANCEL_VIEW, + PAUSE_VIEW, + RESUME_VIEW, END }; diff --git a/src/Parsers/ASTTimeInterval.cpp b/src/Parsers/ASTTimeInterval.cpp new file mode 100644 index 00000000000..4edda531202 --- /dev/null +++ b/src/Parsers/ASTTimeInterval.cpp @@ -0,0 +1,40 @@ +#include + +#include + +#include + +namespace DB +{ + +ASTPtr ASTTimePeriod::clone() const +{ + return std::make_shared(*this); +} + +void ASTTimePeriod::formatImpl(const FormatSettings & f_settings, FormatState &, FormatStateStacked frame) const +{ + frame.need_parens = false; + f_settings.ostr << (f_settings.hilite ? hilite_none : "") << value << ' '; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword(); +} + +ASTPtr ASTTimeInterval::clone() const +{ + return std::make_shared(*this); +} + +void ASTTimeInterval::formatImpl(const FormatSettings & f_settings, FormatState &, FormatStateStacked frame) const +{ + frame.need_parens = false; + + for (bool is_first = true; auto [kind, value] : kinds | std::views::reverse) + { + if (!std::exchange(is_first, false)) + f_settings.ostr << ' '; + f_settings.ostr << (f_settings.hilite ? hilite_none : "") << value << ' '; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword(); + } +} + +} diff --git a/src/Parsers/ASTTimeInterval.h b/src/Parsers/ASTTimeInterval.h new file mode 100644 index 00000000000..374d1e1ec55 --- /dev/null +++ b/src/Parsers/ASTTimeInterval.h @@ -0,0 +1,37 @@ + #pragma once + +#include + +#include + +#include + +namespace DB +{ + +/// Simple periodic time interval like 10 SECOND +class ASTTimePeriod : public IAST +{ +public: + UInt64 value{0}; + IntervalKind kind{IntervalKind::Second}; + + String getID(char) const override { return "TimePeriod"; } + + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +/// Compound time interval like 1 YEAR 3 DAY 15 MINUTE +class ASTTimeInterval : public IAST +{ +public: + std::map kinds; + + String getID(char) const override { return "TimeInterval"; } + + ASTPtr clone() const override; + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +}; + +} diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index f79850467e4..3921a0e37e7 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -1390,6 +1391,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec ASTPtr as_database; ASTPtr as_table; ASTPtr select; + ASTPtr refresh_strategy; String cluster_str; bool attach = false; @@ -1436,6 +1438,15 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } + if (ParserKeyword{"REFRESH"}.ignore(pos, expected)) + { + // REFRESH only with materialized views + if (!is_materialized_view) + return false; + if (!ParserRefreshStrategy{}.parse(pos, refresh_strategy, expected)) + return false; + } + if (is_materialized_view && ParserKeyword{"TO INNER UUID"}.ignore(pos, expected)) { ParserStringLiteral literal_p; @@ -1527,6 +1538,8 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->set(query->columns_list, columns_list); query->set(query->storage, storage); + if (refresh_strategy) + query->set(query->refresh_strategy, refresh_strategy); if (comment) query->set(query->comment, comment); @@ -1535,7 +1548,6 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->set(query->select, select); return true; - } bool ParserCreateNamedCollectionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) diff --git a/src/Parsers/ParserRefreshStrategy.cpp b/src/Parsers/ParserRefreshStrategy.cpp new file mode 100644 index 00000000000..a448556bd4f --- /dev/null +++ b/src/Parsers/ParserRefreshStrategy.cpp @@ -0,0 +1,77 @@ +#include + +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserRefreshStrategy::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto refresh = std::make_shared(); + + if (ParserKeyword{"AFTER"}.ignore(pos, expected)) + { + refresh->schedule_kind = ASTRefreshStrategy::ScheduleKind::AFTER; + ASTPtr interval; + if (!ParserTimeInterval{}.parse(pos, interval, expected)) + return false; + + refresh->set(refresh->interval, interval); + } + else if (ParserKeyword{"EVERY"}.ignore(pos, expected)) + { + refresh->schedule_kind = ASTRefreshStrategy::ScheduleKind::EVERY; + ASTPtr period; + ASTPtr periodic_offset; + if (!ParserTimePeriod{}.parse(pos, period, expected)) + return false; + if (!ParserTimeInterval{}.parse(pos, periodic_offset, expected)) + return false; + + refresh->set(refresh->period, period); + refresh->set(refresh->periodic_offset, periodic_offset); + } + if (refresh->schedule_kind == ASTRefreshStrategy::ScheduleKind::UNKNOWN) + return false; + + if (ParserKeyword{"RANDOMIZE FOR"}.ignore(pos, expected)) + { + ASTPtr spread; + if (!ParserTimePeriod{}.parse(pos, spread, expected)) + return false; + + refresh->set(refresh->spread, spread); + } + + if (ParserKeyword{"DEPENDS ON"}.ignore(pos, expected)) + { + ASTPtr dependencies; + auto list_parser = ParserList{ + std::make_unique(), + std::make_unique(TokenType::Comma), + /* allow_empty= */ false}; + if (!list_parser.parse(pos, dependencies, expected)) + return false; + refresh->set(refresh->dependencies, dependencies); + } + + // Refresh SETTINGS + if (ParserKeyword{"SETTINGS"}.ignore(pos, expected)) + { + /// Settings are written like SET query, so parse them with ParserSetQuery + ASTPtr settings; + if (!ParserSetQuery{true}.parse(pos, settings, expected)) + return false; + refresh->set(refresh->settings, settings); + } + node = refresh; + return true; +} + +} diff --git a/src/Parsers/ParserRefreshStrategy.h b/src/Parsers/ParserRefreshStrategy.h new file mode 100644 index 00000000000..e9edabd7129 --- /dev/null +++ b/src/Parsers/ParserRefreshStrategy.h @@ -0,0 +1,16 @@ +#pragma once + +#include + +namespace DB +{ + +/// Parser for ASTRefreshStrategy +class ParserRefreshStrategy : public IParserBase +{ +protected: + const char * getName() const override { return "refresh strategy"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 2e1283187d3..9115d195904 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -388,6 +388,20 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); break; + case Type::REFRESH_VIEW: + case Type::START_VIEW: + case Type::STOP_VIEW: + case Type::CANCEL_VIEW: + case Type::PAUSE_VIEW: + case Type::RESUME_VIEW: + if (!parseDatabaseAndTableAsAST(pos, expected, res->database, res->table)) + return false; + break; + + case Type::START_VIEWS: + case Type::STOP_VIEWS: + break; + case Type::SUSPEND: { if (!parseQueryWithOnCluster(res, pos, expected)) diff --git a/src/Parsers/ParserTimeInterval.cpp b/src/Parsers/ParserTimeInterval.cpp new file mode 100644 index 00000000000..dac66883083 --- /dev/null +++ b/src/Parsers/ParserTimeInterval.cpp @@ -0,0 +1,71 @@ +#include + +#include +#include + +#include +#include + +namespace DB +{ + +namespace +{ + +struct ValKind +{ + UInt64 val; + IntervalKind kind; + bool empty; +}; + +std::optional parseValKind(IParser::Pos & pos, Expected & expected) +{ + ASTPtr value; + IntervalKind kind; + if (!ParserNumber{}.parse(pos, value, expected)) + return ValKind{ .empty = true }; + if (!parseIntervalKind(pos, expected, kind)) + return {}; + return ValKind{ value->as().value.safeGet(), kind, false }; +} + +} + +bool ParserTimePeriod::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto parsed = parseValKind(pos, expected); + + if (!parsed || parsed->empty || parsed->val == 0) + return false; + + auto time_period = std::make_shared(); + time_period->value = parsed->val; + time_period->kind = parsed->kind; + + node = time_period; + return true; +} + +bool ParserTimeInterval::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto time_interval = std::make_shared(); + + auto parsed = parseValKind(pos, expected); + while (parsed && !parsed->empty) + { + if (parsed->val == 0) + return false; + auto [it, inserted] = time_interval->kinds.emplace(parsed->kind, parsed->val); + if (!inserted) + return false; + parsed = parseValKind(pos, expected); + } + + if (!parsed || time_interval->kinds.empty()) + return false; + node = time_interval; + return true; +} + +} diff --git a/src/Parsers/ParserTimeInterval.h b/src/Parsers/ParserTimeInterval.h new file mode 100644 index 00000000000..6eae1fa4133 --- /dev/null +++ b/src/Parsers/ParserTimeInterval.h @@ -0,0 +1,24 @@ +#pragma once + +#include + +namespace DB +{ + +/// Parser for ASTTimePeriod +class ParserTimePeriod : public IParserBase +{ +protected: + const char * getName() const override { return "time period"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +/// Parser for ASTTimeInterval +class ParserTimeInterval : public IParserBase +{ +protected: + const char * getName() const override { return "time interval"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Processors/Executors/ManualPipelineExecutor.cpp b/src/Processors/Executors/ManualPipelineExecutor.cpp new file mode 100644 index 00000000000..f3ac5028b77 --- /dev/null +++ b/src/Processors/Executors/ManualPipelineExecutor.cpp @@ -0,0 +1,55 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +namespace +{ + +QueryPipeline & validatePipeline(QueryPipeline & query_pipeline) +{ + if (!query_pipeline.completed()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for ManualPipelineExecutor must be completed"); + return query_pipeline; +} + +} + +ManualPipelineExecutor::ManualPipelineExecutor(QueryPipeline & query_pipeline) + : pipeline{&validatePipeline(query_pipeline)} + , executor(pipeline->processors, pipeline->process_list_element) +{ + executor.setReadProgressCallback(pipeline->getReadProgressCallback()); +} + +ManualPipelineExecutor::~ManualPipelineExecutor() +{ + try + { + executor.cancel(); + } + catch (...) + { + tryLogCurrentException("ManualPipelineExecutor"); + } +} + +bool ManualPipelineExecutor::executeStep() +{ + return executor.executeStep(); +} + +bool ManualPipelineExecutor::executeStep(std::atomic_bool & yield_flag) +{ + return executor.executeStep(&yield_flag); +} + +} diff --git a/src/Processors/Executors/ManualPipelineExecutor.h b/src/Processors/Executors/ManualPipelineExecutor.h new file mode 100644 index 00000000000..2fc441609b7 --- /dev/null +++ b/src/Processors/Executors/ManualPipelineExecutor.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class QueryPipeline; + +/// Simple executor for step by step execution of completed QueryPipeline +class ManualPipelineExecutor +{ +public: + explicit ManualPipelineExecutor(QueryPipeline & query_pipeline); + ~ManualPipelineExecutor(); + + bool executeStep(); + bool executeStep(std::atomic_bool & yield_flag); + +private: + QueryPipeline * pipeline; + PipelineExecutor executor; +}; + +} diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index f14cf61aac2..326d2721d9f 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -167,6 +167,7 @@ private: friend class PushingAsyncPipelineExecutor; friend class PullingAsyncPipelineExecutor; friend class CompletedPipelineExecutor; + friend class ManualPipelineExecutor; friend class QueryPipelineBuilder; }; diff --git a/src/Storages/MaterializedView/RefreshAllCombiner.cpp b/src/Storages/MaterializedView/RefreshAllCombiner.cpp new file mode 100644 index 00000000000..5cb06ade9c7 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshAllCombiner.cpp @@ -0,0 +1,58 @@ +#include + +#include + +namespace DB +{ + +RefreshAllCombiner::RefreshAllCombiner() + : time_arrived{false} +{} + +RefreshAllCombiner::RefreshAllCombiner(const std::vector & parents) + : time_arrived{false} +{ + parents_arrived.reserve(parents.size()); + for (auto && parent : parents) + parents_arrived.emplace(parent.uuid, false); +} + +bool RefreshAllCombiner::arriveTime() +{ + std::lock_guard lock(combiner_mutex); + time_arrived = true; + return allArrivedLocked(); +} + +bool RefreshAllCombiner::arriveParent(const StorageID & id) +{ + std::lock_guard lock(combiner_mutex); + parents_arrived[id.uuid] = true; + return allArrivedLocked(); +} + +void RefreshAllCombiner::flush() +{ + std::lock_guard lock(combiner_mutex); + flushLocked(); +} + +bool RefreshAllCombiner::allArrivedLocked() +{ + auto is_value = [](auto && key_value) { return key_value.second; }; + if (time_arrived && std::ranges::all_of(parents_arrived, is_value)) + { + flushLocked(); + return true; + } + return false; +} + +void RefreshAllCombiner::flushLocked() +{ + for (auto & [parent, arrived] : parents_arrived) + arrived = false; + time_arrived = false; +} + +} diff --git a/src/Storages/MaterializedView/RefreshAllCombiner.h b/src/Storages/MaterializedView/RefreshAllCombiner.h new file mode 100644 index 00000000000..f4faf073ae4 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshAllCombiner.h @@ -0,0 +1,33 @@ +#pragma once + +#include + +namespace DB +{ + +/// Concurrent primitive for dependency completeness registration +/// When arrive methods return true, dependant task must be executed (or scheduled) +class RefreshAllCombiner +{ +public: + RefreshAllCombiner(); + + explicit RefreshAllCombiner(const std::vector & parents); + + bool arriveTime(); + + bool arriveParent(const StorageID & id); + + void flush(); + +private: + bool allArrivedLocked(); + + void flushLocked(); + + std::mutex combiner_mutex; + std::unordered_map parents_arrived; + bool time_arrived; +}; + +} diff --git a/src/Storages/MaterializedView/RefreshDependencies.cpp b/src/Storages/MaterializedView/RefreshDependencies.cpp new file mode 100644 index 00000000000..f1a834a6b3a --- /dev/null +++ b/src/Storages/MaterializedView/RefreshDependencies.cpp @@ -0,0 +1,60 @@ +#include + +#include + +namespace DB +{ + +RefreshDependencies::Entry::Entry(RefreshDependencies & deps, ContainerIter it) + : dependencies{&deps} + , entry_it{it} +{} + +RefreshDependencies::Entry::Entry(Entry && other) noexcept + : dependencies(std::exchange(other.dependencies, nullptr)) + , entry_it(std::move(other.entry_it)) +{} + +RefreshDependencies::Entry & RefreshDependencies::Entry::operator=(Entry && other) noexcept +{ + if (this == &other) + return *this; + cleanup(std::exchange(dependencies, std::exchange(other.dependencies, nullptr))); + entry_it = std::move(other.entry_it); + return *this; +} + +RefreshDependencies::Entry::~Entry() +{ + cleanup(dependencies); +} + +void RefreshDependencies::Entry::cleanup(RefreshDependencies * deps) +{ + if (deps) + deps->erase(entry_it); +} + +RefreshDependenciesEntry RefreshDependencies::add(RefreshTaskHolder dependency) +{ + std::lock_guard lock(dependencies_mutex); + return Entry(*this, dependencies.emplace(dependencies.end(), dependency)); +} + +void RefreshDependencies::notifyAll(const StorageID & id) +{ + std::lock_guard lock(dependencies_mutex); + for (auto && dep : dependencies) + { + if (auto task = dep.lock()) + task->notify(id); + } +} + +void RefreshDependencies::erase(ContainerIter it) +{ + std::lock_guard lock(dependencies_mutex); + dependencies.erase(it); +} + +} diff --git a/src/Storages/MaterializedView/RefreshDependencies.h b/src/Storages/MaterializedView/RefreshDependencies.h new file mode 100644 index 00000000000..a4488053adf --- /dev/null +++ b/src/Storages/MaterializedView/RefreshDependencies.h @@ -0,0 +1,56 @@ +#pragma once + +#include + +#include + +#include + + +namespace DB +{ + +class RefreshTask; + +/// Concurrent primitive for managing list of dependant task and notifying them +class RefreshDependencies +{ + using Container = std::list; + using ContainerIter = typename Container::iterator; + +public: + class Entry + { + friend class RefreshDependencies; + + public: + Entry(Entry &&) noexcept; + Entry & operator=(Entry &&) noexcept; + + ~Entry(); + + private: + Entry(RefreshDependencies & deps, ContainerIter it); + + void cleanup(RefreshDependencies * deps); + + RefreshDependencies * dependencies; + ContainerIter entry_it; + }; + + RefreshDependencies() = default; + + Entry add(RefreshTaskHolder dependency); + + void notifyAll(const StorageID & id); + +private: + void erase(ContainerIter it); + + std::mutex dependencies_mutex; + std::list dependencies; +}; + +using RefreshDependenciesEntry = RefreshDependencies::Entry; + +} diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp new file mode 100644 index 00000000000..c38d3408495 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -0,0 +1,128 @@ +#include +#include + +namespace DB +{ + +RefreshSetElement::RefreshSetElement(RefreshTaskHolder task, StorageID id) + : corresponding_task(task) + , view_id(std::move(id)) +{} + +RefreshInfo RefreshSetElement::getInfo() const +{ + return { + .database = view_id.getDatabaseName(), + .view_name = view_id.getTableName(), + .refresh_status = toString(RefreshTask::TaskState{state.load()}), + .last_refresh_status = toString(RefreshTask::LastTaskState{last_state.load()}), + .last_refresh_time = static_cast(last_s.load(std::memory_order_relaxed)), + .next_refresh_time = static_cast(next_s.load(std::memory_order_relaxed)), + .progress = static_cast(written_rows) / total_rows_to_read, + .elapsed_ns = elapsed_ns / 1e9, + .read_rows = read_rows.load(std::memory_order_relaxed), + .read_bytes = read_bytes.load(std::memory_order_relaxed), + .total_rows_to_read = total_rows_to_read.load(std::memory_order_relaxed), + .total_bytes_to_read = total_bytes_to_read.load(std::memory_order_relaxed), + .written_rows = written_rows.load(std::memory_order_relaxed), + .written_bytes = written_bytes.load(std::memory_order_relaxed), + .result_rows = result_rows.load(std::memory_order_relaxed), + .result_bytes = result_bytes.load(std::memory_order_relaxed) + }; +} + +const StorageID & RefreshSetElement::getID() const +{ + return view_id; +} + +RefreshTaskHolder RefreshSetElement::getTask() const +{ + return corresponding_task.lock(); +} + +bool RefreshSetLess::operator()(const RefreshSetElement & l, const RefreshSetElement & r) const +{ + return l.getID().uuid < r.getID().uuid; +} + +bool RefreshSetLess::operator()(const StorageID & l, const RefreshSetElement & r) const +{ + return l.uuid < r.getID().uuid; +} + +bool RefreshSetLess::operator()(const RefreshSetElement & l, const StorageID & r) const +{ + return l.getID().uuid < r.uuid; +} + +bool RefreshSetLess::operator()(const StorageID & l, const StorageID & r) const +{ + return l.uuid < r.uuid; +} + +RefreshSet::Entry::Entry() + : parent_set{nullptr} + , metric_increment{} +{} + +RefreshSet::Entry::Entry(Entry && other) noexcept + : parent_set{std::exchange(other.parent_set, nullptr)} + , iter(std::move(other.iter)) + , metric_increment(std::move(other.metric_increment)) +{} + +RefreshSet::Entry & RefreshSet::Entry::operator=(Entry && other) noexcept +{ + if (this == &other) + return *this; + cleanup(std::exchange(parent_set, std::exchange(other.parent_set, nullptr))); + iter = std::move(other.iter); + metric_increment = std::move(other.metric_increment); + return *this; +} + +RefreshSet::Entry::~Entry() +{ + cleanup(parent_set); +} + +RefreshSet::Entry::Entry(RefreshSet & set, ContainerIter it, const CurrentMetrics::Metric & metric) + : parent_set{&set}, iter(std::move(it)), metric_increment(metric) +{} + +void RefreshSet::Entry::cleanup(RefreshSet * set) +{ + if (set) + set->erase(iter); +} + +RefreshSet::RefreshSet() + : set_metric(CurrentMetrics::Refresh) +{} + +RefreshTaskHolder RefreshSet::getTask(const StorageID & id) const +{ + std::lock_guard lock(elements_mutex); + if (auto element = elements.find(id); element != elements.end()) + return element->getTask(); + return nullptr; +} + +RefreshSet::InfoContainer RefreshSet::getInfo() const +{ + std::lock_guard lock(elements_mutex); + InfoContainer res; + res.reserve(elements.size()); + for (auto && element : elements) + res.emplace_back(element.getInfo()); + return res; +} + +void RefreshSet::erase(ContainerIter it) +{ + std::lock_guard lock(elements_mutex); + elements.erase(it); +} + +} diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h new file mode 100644 index 00000000000..48962d8c96d --- /dev/null +++ b/src/Storages/MaterializedView/RefreshSet.h @@ -0,0 +1,142 @@ +#pragma once + +#include +#include +#include + +#include + +namespace CurrentMetrics +{ + extern const Metric Refresh; +} + +namespace DB +{ + +struct RefreshInfo +{ + String database; + String view_name; + String refresh_status; + String last_refresh_status; + UInt32 last_refresh_time; + UInt32 next_refresh_time; + Float64 progress; + Float64 elapsed_ns; + UInt64 read_rows; + UInt64 read_bytes; + UInt64 total_rows_to_read; + UInt64 total_bytes_to_read; + UInt64 written_rows; + UInt64 written_bytes; + UInt64 result_rows; + UInt64 result_bytes; +}; + +class RefreshSetElement +{ + friend class RefreshTask; +public: + RefreshSetElement(RefreshTaskHolder task, StorageID id); + + RefreshSetElement(const RefreshSetElement &) = delete; + RefreshSetElement & operator=(const RefreshSetElement &) = delete; + + RefreshInfo getInfo() const; + + RefreshTaskHolder getTask() const; + + const StorageID & getID() const; + +private: + RefreshTaskObserver corresponding_task; + StorageID view_id; + + mutable std::atomic read_rows{0}; + mutable std::atomic read_bytes{0}; + mutable std::atomic total_rows_to_read{0}; + mutable std::atomic total_bytes_to_read{0}; + mutable std::atomic written_rows{0}; + mutable std::atomic written_bytes{0}; + mutable std::atomic result_rows{0}; + mutable std::atomic result_bytes{0}; + mutable std::atomic elapsed_ns{0}; + mutable std::atomic last_s{0}; + mutable std::atomic next_s{0}; + mutable std::atomic state{0}; + mutable std::atomic last_state{0}; +}; + +struct RefreshSetLess +{ + using is_transparent = std::true_type; + + bool operator()(const RefreshSetElement & l, const RefreshSetElement & r) const; + bool operator()(const StorageID & l, const RefreshSetElement & r) const; + bool operator()(const RefreshSetElement & l, const StorageID & r) const; + bool operator()(const StorageID & l, const StorageID & r) const; +}; + +/// Set of refreshable views +class RefreshSet +{ +private: + using Container = std::set; + using ContainerIter = typename Container::iterator; + +public: + class Entry + { + friend class RefreshSet; + public: + Entry(); + + Entry(Entry &&) noexcept; + Entry & operator=(Entry &&) noexcept; + + ~Entry(); + + const RefreshSetElement * operator->() const { return std::to_address(iter); } + + private: + RefreshSet * parent_set; + ContainerIter iter; + std::optional metric_increment; + + Entry( + RefreshSet & set, + ContainerIter it, + const CurrentMetrics::Metric & metric); + + void cleanup(RefreshSet * set); + }; + + using InfoContainer = std::vector; + + RefreshSet(); + + template + std::optional emplace(Args &&... args) + { + std::lock_guard guard(elements_mutex); + if (auto [it, is_inserted] = elements.emplace(std::forward(args)...); is_inserted) + return Entry(*this, std::move(it), set_metric); + return {}; + } + + RefreshTaskHolder getTask(const StorageID & id) const; + + InfoContainer getInfo() const; + +private: + mutable std::mutex elements_mutex; + Container elements; + CurrentMetrics::Metric set_metric; + + void erase(ContainerIter it); +}; + +using RefreshSetEntry = RefreshSet::Entry; + +} diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp new file mode 100644 index 00000000000..579d3252865 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -0,0 +1,292 @@ +#include + +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +std::uniform_int_distribution makeSpreadDistribution(const ASTTimePeriod * spread) +{ + if (!spread) + return std::uniform_int_distribution(0, 0); + Int64 limit = spread->kind.toAvgSeconds() * spread->value / 2; + return std::uniform_int_distribution(-limit, limit); +} + +std::variant makeRefreshTimer(const ASTRefreshStrategy & strategy) +{ + using enum ASTRefreshStrategy::ScheduleKind; + switch (strategy.schedule_kind) + { + case EVERY: + return RefreshEveryTimer{*strategy.period, strategy.interval}; + case AFTER: + return RefreshAfterTimer{strategy.interval}; + default: + throw Exception("Unknown refresh strategy kind", ErrorCodes::BAD_ARGUMENTS); + } +} + +} + +RefreshTask::RefreshTask( + const ASTRefreshStrategy & strategy) + : refresh_timer(makeRefreshTimer(strategy)) + , refresh_spread{makeSpreadDistribution(strategy.spread)} + , refresh_immediately{false} + , interrupt_execution{false} + , canceled{false} +{} + +RefreshTaskHolder RefreshTask::create( + const StorageMaterializedView & view, + ContextMutablePtr context, + const DB::ASTRefreshStrategy & strategy) +{ + auto task = std::make_shared(strategy); + + task->refresh_task = context->getSchedulePool().createTask("MaterializedViewRefresherTask", task->makePoolTask()); + task->set_entry = context->getRefreshSet().emplace(task, view.getStorageID()).value(); + if (strategy.dependencies) + { + if (strategy.schedule_kind != ASTRefreshStrategy::ScheduleKind::AFTER) + throw Exception("Dependencies are allowed only for AFTER refresh kind", ErrorCodes::BAD_ARGUMENTS); + + task->deps_entries.reserve(strategy.dependencies->children.size()); + for (auto && dependency : strategy.dependencies->children) + { + StorageID dep_id(dependency->as()); + if (auto dep_task = context->getRefreshSet().getTask(dep_id)) + task->deps_entries.push_back(dep_task->dependencies.add(task)); + } + } + + return task; +} + +void RefreshTask::initialize(std::shared_ptr view) +{ + view_to_refresh = view; +} + +void RefreshTask::start() +{ + storeState(TaskState::Scheduled); + refresh_task->activateAndSchedule(); +} + +void RefreshTask::stop() +{ + refresh_task->deactivate(); + cancel(); + storeState(TaskState::Disabled); +} + +void RefreshTask::run() +{ + refresh_immediately.store(true); + refresh_task->activateAndSchedule(); +} + +void RefreshTask::cancel() +{ + canceled.store(true); + interrupt_execution.store(true); +} + +void RefreshTask::pause() +{ + interrupt_execution.store(true); +} + +void RefreshTask::resume() +{ + interrupt_execution.store(false); + refresh_immediately.store(true); + refresh_task->schedule(); +} + +void RefreshTask::notify(const StorageID & parent_id) +{ + if (combiner.arriveParent(parent_id)) + { + refresh_immediately.store(true); + refresh_task->schedule(); + } +} + +void RefreshTask::doRefresh() +{ + if (refresh_immediately.exchange(false)) + { + refresh(); + } + else + { + auto now = std::chrono::system_clock::now(); + if (now >= next_refresh) + { + if (combiner.arriveTime()) + refresh(); + } + else + scheduleRefresh(now); + } +} + +void RefreshTask::refresh() +{ + auto view = lockView(); + if (!view) + return; + + if (!refresh_executor) + initializeRefresh(view); + + storeState(TaskState::Running); + + switch (executeRefresh()) + { + case ExecutionResult::Paused: + storeState(TaskState::Paused); + return; + case ExecutionResult::Finished: + completeRefresh(view); + storeLastState(LastTaskState::Finished); + break; + case ExecutionResult::Cancelled: + storeLastState(LastTaskState::Canceled); + break; + } + + refresh_executor.reset(); + refresh_block.reset(); + + storeLastRefresh(std::chrono::system_clock::now()); + scheduleRefresh(last_refresh); +} + +RefreshTask::ExecutionResult RefreshTask::executeRefresh() +{ + bool not_finished{true}; + while (!interrupt_execution.load() && not_finished) + not_finished = refresh_executor->executeStep(interrupt_execution); + + if (!not_finished) + return ExecutionResult::Finished; + if (interrupt_execution.load() && !canceled.load()) + return ExecutionResult::Paused; + return ExecutionResult::Cancelled; + +} + +void RefreshTask::initializeRefresh(std::shared_ptr view) +{ + refresh_query = view->prepareRefreshQuery(); + auto refresh_context = Context::createCopy(view->getContext()); + refresh_block = InterpreterInsertQuery(refresh_query, refresh_context).execute(); + refresh_block->pipeline.setProgressCallback([this](const Progress & progress){ progressCallback(progress); }); + + canceled.store(false); + interrupt_execution.store(false); + + refresh_executor.emplace(refresh_block->pipeline); +} + +void RefreshTask::completeRefresh(std::shared_ptr view) +{ + view->updateInnerTableAfterRefresh(refresh_query); + dependencies.notifyAll(view->getStorageID()); +} + +void RefreshTask::scheduleRefresh(std::chrono::system_clock::time_point now) +{ + using namespace std::chrono_literals; + auto scheduled_refresh = calculateRefreshTime(now) + genSpreadSeconds(); + storeNextRefresh(scheduled_refresh); + auto schedule_time = std::chrono::ceil(scheduled_refresh - now); + storeState(TaskState::Scheduled); + refresh_task->scheduleAfter(std::max(schedule_time, 0ms).count()); +} + +namespace +{ + +template +struct CombinedVisitor : Ts... { using Ts::operator()...; }; +template +CombinedVisitor(Ts...) -> CombinedVisitor; + +} + +std::chrono::sys_seconds RefreshTask::calculateRefreshTime(std::chrono::system_clock::time_point now) const +{ + CombinedVisitor refresh_time_visitor{ + [now](const RefreshAfterTimer & timer) { return timer.after(now); }, + [now](const RefreshEveryTimer & timer) { return timer.next(now); }}; + return std::visit(std::move(refresh_time_visitor), refresh_timer); +} + +std::chrono::seconds RefreshTask::genSpreadSeconds() +{ + return std::chrono::seconds{refresh_spread(thread_local_rng)}; +} + +void RefreshTask::progressCallback(const Progress & progress) +{ + set_entry->read_rows.store(progress.read_rows, std::memory_order_relaxed); + set_entry->read_bytes.store(progress.read_bytes, std::memory_order_relaxed); + set_entry->total_rows_to_read.store(progress.total_rows_to_read, std::memory_order_relaxed); + set_entry->total_bytes_to_read.store(progress.total_bytes_to_read, std::memory_order_relaxed); + set_entry->written_rows.store(progress.written_rows, std::memory_order_relaxed); + set_entry->written_bytes.store(progress.written_bytes, std::memory_order_relaxed); + set_entry->result_rows.store(progress.result_rows, std::memory_order_relaxed); + set_entry->result_bytes.store(progress.result_bytes, std::memory_order_relaxed); + set_entry->elapsed_ns.store(progress.elapsed_ns, std::memory_order_relaxed); +} + +std::shared_ptr RefreshTask::lockView() +{ + return std::static_pointer_cast(view_to_refresh.lock()); +} + +void RefreshTask::storeState(TaskState task_state) +{ + state.store(task_state); + set_entry->state.store(static_cast(task_state)); +} + +void RefreshTask::storeLastState(LastTaskState task_state) +{ + last_state = task_state; + set_entry->last_state.store(static_cast(task_state)); +} + +void RefreshTask::storeLastRefresh(std::chrono::system_clock::time_point last) +{ + last_refresh = last; + auto secs = std::chrono::floor(last); + set_entry->last_s.store(secs.time_since_epoch().count()); +} + +void RefreshTask::storeNextRefresh(std::chrono::system_clock::time_point next) +{ + next_refresh = next; + auto secs = std::chrono::floor(next); + set_entry->next_s.store(secs.time_since_epoch().count()); +} + +} diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h new file mode 100644 index 00000000000..5e9bb618372 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -0,0 +1,150 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include + +#include + + +namespace DB +{ + +class StorageMaterializedView; +class ASTRefreshStrategy; + +class RefreshTask : public std::enable_shared_from_this +{ +public: + enum class TaskState : RefreshTaskStateUnderlying + { + Disabled = 0, + Scheduled, + Running, + Paused + }; + + enum class LastTaskState : RefreshTaskStateUnderlying + { + Unknown = 0, + Canceled, + Finished + }; + + /// Never call it manual, public for shared_ptr construction only + RefreshTask(const ASTRefreshStrategy & strategy); + + /// The only proper way to construct task + static RefreshTaskHolder create( + const StorageMaterializedView & view, + ContextMutablePtr context, + const DB::ASTRefreshStrategy & strategy); + + void initialize(std::shared_ptr view); + + /// Enable task scheduling + void start(); + + /// Disable task scheduling + void stop(); + + /// Schedule task immediately + void run(); + + /// Cancel task execution + void cancel(); + + /// Pause task execution (must be either resumed or canceled later) + void pause(); + + /// Resume task execution + void resume(); + + /// Notify dependant task + void notify(const StorageID & parent_id); + +private: + enum class ExecutionResult : UInt8 + { + Finished, + Paused, + Cancelled + }; + + void doRefresh(); + + void scheduleRefresh(std::chrono::system_clock::time_point now); + + void refresh(); + + ExecutionResult executeRefresh(); + + void initializeRefresh(std::shared_ptr view); + + void completeRefresh(std::shared_ptr view); + + std::chrono::sys_seconds calculateRefreshTime(std::chrono::system_clock::time_point now) const; + + std::chrono::seconds genSpreadSeconds(); + + void progressCallback(const Progress & progress); + + auto makePoolTask() + { + return [self = this->weak_from_this()] + { + if (auto task = self.lock()) + task->doRefresh(); + }; + } + + std::shared_ptr lockView(); + + void storeState(TaskState task_state); + + void storeLastState(LastTaskState task_state); + + void storeLastRefresh(std::chrono::system_clock::time_point last); + + void storeNextRefresh(std::chrono::system_clock::time_point next); + + /// Task ownership + BackgroundSchedulePool::TaskHolder refresh_task; + std::weak_ptr view_to_refresh; + RefreshSet::Entry set_entry; + + /// Task execution + std::optional refresh_executor; + std::optional refresh_block; + std::shared_ptr refresh_query; + + /// Concurrent dependency management + RefreshAllCombiner combiner; + RefreshDependencies dependencies; + std::vector deps_entries; + + /// Refresh time settings and data + std::chrono::system_clock::time_point last_refresh; + std::chrono::system_clock::time_point next_refresh; + std::variant refresh_timer; + + /// Refresh time randomization + std::uniform_int_distribution refresh_spread; + + /// Task state + std::atomic state{TaskState::Disabled}; + LastTaskState last_state{LastTaskState::Unknown}; + + /// Outer triggers + std::atomic_bool refresh_immediately; + std::atomic_bool interrupt_execution; + std::atomic_bool canceled; +}; + +} diff --git a/src/Storages/MaterializedView/RefreshTask_fwd.h b/src/Storages/MaterializedView/RefreshTask_fwd.h new file mode 100644 index 00000000000..1f366962eb6 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshTask_fwd.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class RefreshTask; + +using RefreshTaskStateUnderlying = UInt8; +using RefreshTaskHolder = std::shared_ptr; +using RefreshTaskObserver = std::weak_ptr; + +} diff --git a/src/Storages/MaterializedView/RefreshTimers.cpp b/src/Storages/MaterializedView/RefreshTimers.cpp new file mode 100644 index 00000000000..973eba46057 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshTimers.cpp @@ -0,0 +1,243 @@ +#include + +#include + +namespace DB +{ + +namespace +{ + constexpr std::chrono::days ZERO_DAYS{0}; + constexpr std::chrono::days ONE_DAY{1}; +} + +RefreshAfterTimer::RefreshAfterTimer(const ASTTimeInterval * time_interval) +{ + if (time_interval) + { + for (auto && [kind, value] : time_interval->kinds) + setWithKind(kind, value); + } +} + +std::chrono::sys_seconds RefreshAfterTimer::after(std::chrono::system_clock::time_point tp) const +{ + auto tp_date = std::chrono::floor(tp); + auto tp_time_offset = std::chrono::floor(tp - tp_date); + std::chrono::year_month_day ymd(tp_date); + ymd += years; + ymd += months; + std::chrono::sys_days date = ymd; + date += weeks; + date += days; + auto result = std::chrono::time_point_cast(date); + result += tp_time_offset; + result += hours; + result += minutes; + result += seconds; + return result; +} + +void RefreshAfterTimer::setWithKind(IntervalKind kind, UInt64 val) +{ + switch (kind) + { + case IntervalKind::Second: + seconds = std::chrono::seconds{val}; + break; + case IntervalKind::Minute: + minutes = std::chrono::minutes{val}; + break; + case IntervalKind::Hour: + hours = std::chrono::hours{val}; + break; + case IntervalKind::Day: + days = std::chrono::days{val}; + break; + case IntervalKind::Week: + weeks = std::chrono::weeks{val}; + break; + case IntervalKind::Month: + months = std::chrono::months{val}; + break; + case IntervalKind::Year: + years = std::chrono::years{val}; + break; + default: + break; + } +} + +RefreshEveryTimer::RefreshEveryTimer(const ASTTimePeriod & time_period, const ASTTimeInterval * time_offset) + : offset(time_offset) + , value{static_cast(time_period.value)} + , kind{time_period.kind} +{ + // TODO: validate invariants +} + +std::chrono::sys_seconds RefreshEveryTimer::next(std::chrono::system_clock::time_point tp) const +{ + if (value == 0) + return std::chrono::floor(tp); + switch (kind) + { + case IntervalKind::Second: + return alignedToSeconds(tp); + case IntervalKind::Minute: + return alignedToMinutes(tp); + case IntervalKind::Hour: + return alignedToHours(tp); + case IntervalKind::Day: + return alignedToDays(tp); + case IntervalKind::Week: + return alignedToWeeks(tp); + case IntervalKind::Month: + return alignedToMonths(tp); + case IntervalKind::Year: + return alignedToYears(tp); + default: + return std::chrono::ceil(tp); + } +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToYears(std::chrono::system_clock::time_point tp) const +{ + using namespace std::chrono_literals; + + auto tp_days = std::chrono::floor(tp); + std::chrono::year_month_day tp_ymd(tp_days); + auto normalize_years = [](std::chrono::year year) -> std::chrono::sys_days + { + return year / std::chrono::January / 1d; + }; + + auto prev_years = normalize_years(tp_ymd.year()); + if (auto prev_time = offset.after(prev_years); prev_time > tp) + return prev_time; + + auto next_years = normalize_years(tp_ymd.year() + std::chrono::years{1}); + return offset.after(next_years); +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToMonths(std::chrono::system_clock::time_point tp) const +{ + using namespace std::chrono_literals; + + auto tp_days = std::chrono::floor(tp); + std::chrono::year_month_day tp_ymd(tp_days); + auto normalize_months = [](const std::chrono::year_month_day & ymd, unsigned month_value) -> std::chrono::sys_days + { + return ymd.year() / std::chrono::month{month_value} / 1d; + }; + + auto prev_month_value = static_cast(tp_ymd.month()) / value * value; + auto prev_months = normalize_months(tp_ymd, prev_month_value); + if (auto prev_time = offset.after(prev_months); prev_time > tp) + return prev_time; + + auto next_month_value = (static_cast(tp_ymd.month()) / value + 1) * value; + auto next_months = normalize_months(tp_ymd, next_month_value); + std::chrono::year_month_day next_ymd(next_months); + if (next_ymd.year() > tp_ymd.year()) + return offset.after(normalize_months(next_ymd, value)); + return offset.after(next_months); +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToWeeks(std::chrono::system_clock::time_point tp) const +{ + using namespace std::chrono_literals; + + auto cpp_weekday = offset.getDays() + ONE_DAY; + std::chrono::weekday offset_weekday((cpp_weekday - std::chrono::floor(cpp_weekday)).count()); + + auto tp_days = std::chrono::floor(tp); + std::chrono::year_month_weekday tp_ymd(tp_days); + auto normalize_weeks = [offset_weekday](const std::chrono::year_month_weekday & ymd, unsigned week_value) + { + return std::chrono::sys_days(ymd.year() / ymd.month() / std::chrono::weekday{offset_weekday}[week_value]); + }; + + auto prev_week_value = tp_ymd.index() / value * value; + auto prev_days = normalize_weeks(tp_ymd, prev_week_value); + if (auto prev_time = offset.after(prev_days - offset.getDays()); prev_time > tp) + return prev_time; + + auto next_day_value = (tp_ymd.index() / value + 1) * value; + auto next_days = normalize_weeks(tp_ymd, next_day_value); + std::chrono::year_month_weekday next_ymd(next_days); + if (next_ymd.year() > tp_ymd.year() || next_ymd.month() > tp_ymd.month()) + return offset.after(normalize_weeks(next_ymd, value) - offset.getDays()); + return offset.after(next_days); +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToDays(std::chrono::system_clock::time_point tp) const +{ + auto tp_days = std::chrono::floor(tp); + std::chrono::year_month_day tp_ymd(tp_days); + auto normalize_days = [](const std::chrono::year_month_day & ymd, unsigned day_value) -> std::chrono::sys_days + { + return ymd.year() / ymd.month() / std::chrono::day{day_value}; + }; + + auto prev_day_value = static_cast(tp_ymd.day()) / value * value; + auto prev_days = normalize_days(tp_ymd, prev_day_value); + if (auto prev_time = offset.after(prev_days); prev_time > tp) + return prev_time; + + auto next_day_value = (static_cast(tp_ymd.day()) / value + 1) * value; + auto next_days = normalize_days(tp_ymd, next_day_value); + std::chrono::year_month_day next_ymd(next_days); + if (next_ymd.year() > tp_ymd.year() || next_ymd.month() > tp_ymd.month()) + return offset.after(normalize_days(next_ymd, value)); + return offset.after(next_days); +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToHours(std::chrono::system_clock::time_point tp) const +{ + using namespace std::chrono_literals; + + auto tp_days = std::chrono::floor(tp); + auto tp_hours = std::chrono::floor(tp - tp_days); + + auto prev_hours = (tp_hours / value) * value; + if (auto prev_time = offset.after(tp_days + prev_hours); prev_time > tp) + return prev_time; + + auto next_hours = (tp_hours / value + 1h) * value; + if (std::chrono::floor(next_hours - 1h) > ZERO_DAYS) + return offset.after(tp_days + ONE_DAY + std::chrono::hours{value}); + return offset.after(tp_days + next_hours); +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToMinutes(std::chrono::system_clock::time_point tp) const +{ + using namespace std::chrono_literals; + + auto tp_hours = std::chrono::floor(tp); + auto tp_minutes = std::chrono::floor(tp - tp_hours); + + auto prev_minutes = (tp_minutes / value) * value; + if (auto prev_time = offset.after(tp_hours + prev_minutes); prev_time > tp) + return prev_time; + + auto next_minutes = (tp_minutes / value + 1min) * value; + if (std::chrono::floor(next_minutes - 1min) > 0h) + return offset.after(tp_hours + 1h + std::chrono::minutes{value}); + return offset.after(tp_hours + next_minutes); +} + +std::chrono::sys_seconds RefreshEveryTimer::alignedToSeconds(std::chrono::system_clock::time_point tp) const +{ + using namespace std::chrono_literals; + + auto tp_minutes = std::chrono::floor(tp); + auto tp_seconds = std::chrono::floor(tp - tp_minutes); + + auto next_seconds= (tp_seconds / value + 1s) * value; + if (std::chrono::floor(next_seconds - 1s) > 0min) + return tp_minutes + 1min + std::chrono::seconds{value}; + return tp_minutes + next_seconds; +} + +} diff --git a/src/Storages/MaterializedView/RefreshTimers.h b/src/Storages/MaterializedView/RefreshTimers.h new file mode 100644 index 00000000000..0672782a3f9 --- /dev/null +++ b/src/Storages/MaterializedView/RefreshTimers.h @@ -0,0 +1,69 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +class ASTTimeInterval; +class ASTTimePeriod; + +/// Schedule timer for MATERIALIZED VIEW ... REFRESH AFTER ... queries +class RefreshAfterTimer +{ +public: + explicit RefreshAfterTimer(const ASTTimeInterval * time_interval); + + std::chrono::sys_seconds after(std::chrono::system_clock::time_point tp) const; + + std::chrono::seconds getSeconds() const { return seconds; } + std::chrono::minutes getMinutes() const { return minutes; } + std::chrono::hours getHours() const { return hours; } + std::chrono::days getDays() const { return days; } + std::chrono::weeks getWeeks() const { return weeks; } + std::chrono::months getMonths() const { return months; } + std::chrono::years getYears() const { return years; } + +private: + void setWithKind(IntervalKind kind, UInt64 val); + + std::chrono::seconds seconds{0}; + std::chrono::minutes minutes{0}; + std::chrono::hours hours{0}; + std::chrono::days days{0}; + std::chrono::weeks weeks{0}; + std::chrono::months months{0}; + std::chrono::years years{0}; +}; + +/// Schedule timer for MATERIALIZED VIEW ... REFRESH EVERY ... queries +class RefreshEveryTimer +{ +public: + explicit RefreshEveryTimer(const ASTTimePeriod & time_period, const ASTTimeInterval * time_offset); + + std::chrono::sys_seconds next(std::chrono::system_clock::time_point tp) const; + +private: + std::chrono::sys_seconds alignedToYears(std::chrono::system_clock::time_point tp) const; + + std::chrono::sys_seconds alignedToMonths(std::chrono::system_clock::time_point tp) const; + + std::chrono::sys_seconds alignedToWeeks(std::chrono::system_clock::time_point tp) const; + + std::chrono::sys_seconds alignedToDays(std::chrono::system_clock::time_point tp) const; + + std::chrono::sys_seconds alignedToHours(std::chrono::system_clock::time_point tp) const; + + std::chrono::sys_seconds alignedToMinutes(std::chrono::system_clock::time_point tp) const; + + std::chrono::sys_seconds alignedToSeconds(std::chrono::system_clock::time_point tp) const; + + RefreshAfterTimer offset; + UInt32 value{0}; + IntervalKind kind{IntervalKind::Second}; +}; + +} diff --git a/src/Storages/MaterializedView/tests/gtest_timers.cpp b/src/Storages/MaterializedView/tests/gtest_timers.cpp new file mode 100644 index 00000000000..2a9f30c57fa --- /dev/null +++ b/src/Storages/MaterializedView/tests/gtest_timers.cpp @@ -0,0 +1,27 @@ +#include + +#include +#include + +using namespace DB; + +TEST(Timers, AfterTimer) +{ + using namespace std::chrono; + + auto interval = std::make_shared(); + interval->kinds = { + {IntervalKind::Week, 2}, + {IntervalKind::Day, 3}, + {IntervalKind::Minute, 15}, + }; + RefreshAfterTimer timer(interval.get()); + + sys_days date_in = 2023y / January / 18d; + auto secs_in = date_in + 23h + 57min; + + sys_days date_out = 2023y / February / 5d; + auto secs_out = date_out + 0h + 12min; + + ASSERT_EQ(secs_out, timer.after(secs_in)); +} diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 2339fd11cf8..43e8e0d6f33 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -1,5 +1,7 @@ #include +#include + #include #include @@ -7,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +41,11 @@ namespace ErrorCodes extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW; } +namespace ActionLocks +{ + extern const StorageActionBlockType ViewRefresh; +} + static inline String generateInnerTableName(const StorageID & view_id) { if (view_id.hasUUID()) @@ -126,6 +134,12 @@ StorageMaterializedView::StorageMaterializedView( target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->getDatabase(), manual_create_query->getTable()}, getContext())->getStorageID(); } + + if (query.refresh_strategy) + refresher = RefreshTask::create( + *this, + getContext(), + *query.refresh_strategy); } QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( @@ -236,23 +250,24 @@ void StorageMaterializedView::dropInnerTableIfAny(bool sync, ContextPtr local_co /// See the comment in StorageMaterializedView::drop. /// DDL queries with StorageMaterializedView are fundamentally broken. /// Best-effort to make them work: the inner table name is almost always less than the MV name (so it's safe to lock DDLGuard) - bool may_lock_ddl_guard = getStorageID().getQualifiedName() < target_table_id.getQualifiedName(); + auto inner_table_id = getTargetTableId(); + bool may_lock_ddl_guard = getStorageID().getQualifiedName() < inner_table_id.getQualifiedName(); if (has_inner_table && tryGetTargetTable()) - InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, target_table_id, + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, getContext(), local_context, inner_table_id, sync, /* ignore_sync_setting */ true, may_lock_ddl_guard); } void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { if (has_inner_table) - InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Truncate, getContext(), local_context, target_table_id, true); + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Truncate, getContext(), local_context, getTargetTableId(), true); } void StorageMaterializedView::checkStatementCanBeForwarded() const { if (!has_inner_table) throw Exception(ErrorCodes::INCORRECT_QUERY, "MATERIALIZED VIEW targets existing table {}. " - "Execute the statement directly on it.", target_table_id.getNameForLogs()); + "Execute the statement directly on it.", getTargetTableId().getNameForLogs()); } bool StorageMaterializedView::optimize( @@ -270,6 +285,48 @@ bool StorageMaterializedView::optimize( return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } +std::shared_ptr StorageMaterializedView::prepareRefreshQuery() +{ + auto inner_table_id = getTargetTableId(); + auto new_table_name = ".tmp" + generateInnerTableName(getStorageID()); + + auto db = DatabaseCatalog::instance().getDatabase(inner_table_id.database_name); + + auto create_table_query = db->getCreateTableQuery(inner_table_id.table_name, getContext()); + auto & create_query = create_table_query->as(); + create_query.setTable(new_table_name); + create_query.setDatabase(db->getDatabaseName()); + create_query.create_or_replace = true; + create_query.replace_table = true; + create_query.uuid = UUIDHelpers::Nil; + + auto create_ctx = Context::createCopy(getContext()); + InterpreterCreateQuery create_interpreter(create_table_query, create_ctx); + create_interpreter.setInternal(true); + create_interpreter.execute(); + + auto insert_query = std::make_shared(); + insert_query->setTable(new_table_name); + insert_query->setDatabase(db->getDatabaseName()); + insert_query->select = getInMemoryMetadataPtr()->getSelectQuery().select_query; + + return insert_query; +} + +void StorageMaterializedView::updateInnerTableAfterRefresh(std::shared_ptr refresh_query) +{ + auto inner_table_id = getTargetTableId(); + + auto db = DatabaseCatalog::instance().getDatabase(inner_table_id.database_name); + auto target_db = DatabaseCatalog::instance().getDatabase(refresh_query->getDatabase()); + + auto rename_ctx = Context::createCopy(getContext()); + target_db->renameTable( + rename_ctx, refresh_query->getTable(), *db, inner_table_id.table_name, /*exchange=*/true, /*dictionary=*/false); + + setTargetTableId(db->getTable(refresh_query->getTable(), getContext())->getStorageID()); +} + void StorageMaterializedView::alter( const AlterCommands & params, ContextPtr local_context, @@ -332,6 +389,7 @@ void StorageMaterializedView::mutate(const MutationCommands & commands, ContextP void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) { auto old_table_id = getStorageID(); + auto inner_table_id = getTargetTableId(); auto metadata_snapshot = getInMemoryMetadataPtr(); bool from_atomic_to_atomic_database = old_table_id.hasUUID() && new_table_id.hasUUID(); @@ -340,14 +398,14 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) auto new_target_table_name = generateInnerTableName(new_table_id); auto rename = std::make_shared(); - assert(target_table_id.database_name == old_table_id.database_name); + assert(inner_table_id.database_name == old_table_id.database_name); ASTRenameQuery::Element elem { ASTRenameQuery::Table { - target_table_id.database_name.empty() ? nullptr : std::make_shared(target_table_id.database_name), - std::make_shared(target_table_id.table_name) + inner_table_id.database_name.empty() ? nullptr : std::make_shared(inner_table_id.database_name), + std::make_shared(inner_table_id.table_name) }, ASTRenameQuery::Table { @@ -358,15 +416,14 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) rename->elements.emplace_back(std::move(elem)); InterpreterRenameQuery(rename, getContext()).execute(); - target_table_id.database_name = new_table_id.database_name; - target_table_id.table_name = new_target_table_name; + updateTargetTableId(new_table_id.database_name, new_target_table_name); } IStorage::renameInMemory(new_table_id); if (from_atomic_to_atomic_database && has_inner_table) { - assert(target_table_id.database_name == old_table_id.database_name); - target_table_id.database_name = new_table_id.database_name; + assert(inner_table_id.database_name == old_table_id.database_name); + updateTargetTableId(new_table_id.database_name, std::nullopt); } const auto & select_query = metadata_snapshot->getSelectQuery(); // TODO Actually we don't need to update dependency if MV has UUID, but then db and table name will be outdated @@ -379,10 +436,19 @@ void StorageMaterializedView::startup() const auto & select_query = metadata_snapshot->getSelectQuery(); if (!select_query.select_table_id.empty()) DatabaseCatalog::instance().addViewDependency(select_query.select_table_id, getStorageID()); + + if (refresher) + { + refresher->initialize(std::static_pointer_cast(shared_from_this())); + refresher->start(); + } } void StorageMaterializedView::shutdown(bool) { + if (refresher) + refresher->stop(); + auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & select_query = metadata_snapshot->getSelectQuery(); /// Make sure the dependency is removed after DETACH TABLE @@ -393,13 +459,13 @@ void StorageMaterializedView::shutdown(bool) StoragePtr StorageMaterializedView::getTargetTable() const { checkStackSize(); - return DatabaseCatalog::instance().getTable(target_table_id, getContext()); + return DatabaseCatalog::instance().getTable(getTargetTableId(), getContext()); } StoragePtr StorageMaterializedView::tryGetTargetTable() const { checkStackSize(); - return DatabaseCatalog::instance().tryGetTable(target_table_id, getContext()); + return DatabaseCatalog::instance().tryGetTable(getTargetTableId(), getContext()); } NamesAndTypesList StorageMaterializedView::getVirtuals() const @@ -472,6 +538,8 @@ std::optional StorageMaterializedView::totalBytesUncompressed(const Sett ActionLock StorageMaterializedView::getActionLock(StorageActionBlockType type) { + if (type == ActionLocks::ViewRefresh && refresher) + refresher->stop(); if (has_inner_table) { if (auto target_table = tryGetTargetTable()) @@ -487,6 +555,34 @@ bool StorageMaterializedView::isRemote() const return false; } +void StorageMaterializedView::onActionLockRemove(StorageActionBlockType action_type) +{ + if (action_type == ActionLocks::ViewRefresh && refresher) + refresher->start(); + /// TODO: Do we need to release action lock on inner table? +} + +DB::StorageID StorageMaterializedView::getTargetTableId() const +{ + std::lock_guard guard(target_table_id_mutex); + return target_table_id; +} + +void StorageMaterializedView::setTargetTableId(DB::StorageID id) +{ + std::lock_guard guard(target_table_id_mutex); + target_table_id = std::move(id); +} + +void StorageMaterializedView::updateTargetTableId(std::optional database_name, std::optional table_name) +{ + std::lock_guard guard(target_table_id_mutex); + if (database_name) + target_table_id.database_name = *std::move(database_name); + if (table_name) + target_table_id.table_name = *std::move(table_name); +} + void registerStorageMaterializedView(StorageFactory & factory) { factory.registerStorage("MaterializedView", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 9ddcc458f3e..2dce8355b47 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -83,6 +84,7 @@ public: NamesAndTypesList getVirtuals() const override; ActionLock getActionLock(StorageActionBlockType type) override; + void onActionLockRemove(StorageActionBlockType action_type) override; void read( QueryPlan & query_plan, @@ -105,12 +107,25 @@ public: std::optional totalBytesUncompressed(const Settings & settings) const override; private: + mutable std::mutex target_table_id_mutex; /// Will be initialized in constructor StorageID target_table_id = StorageID::createEmpty(); + RefreshTaskHolder refresher; + bool has_inner_table = false; + friend class RefreshTask; + void checkStatementCanBeForwarded() const; + + std::shared_ptr prepareRefreshQuery(); + + void updateInnerTableAfterRefresh(std::shared_ptr refresh_query); + + StorageID getTargetTableId() const; + void setTargetTableId(StorageID id); + void updateTargetTableId(std::optional database_name, std::optional table_name); }; } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp new file mode 100644 index 00000000000..0ddf8a48c9d --- /dev/null +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -0,0 +1,67 @@ +#include + +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemViewRefreshes::getNamesAndTypes() +{ + return { + {"database", std::make_shared()}, + {"view", std::make_shared()}, + {"refresh_status", std::make_shared()}, + {"last_refresh_status", std::make_shared()}, + {"last_refresh_time", std::make_shared()}, + {"next_refresh_time", std::make_shared()}, + {"progress", std::make_shared()}, + {"elapsed", std::make_shared()}, + {"read_rows", std::make_shared()}, + {"read_bytes", std::make_shared()}, + {"total_rows", std::make_shared()}, + {"total_bytes", std::make_shared()}, + {"written_rows", std::make_shared()}, + {"written_bytes", std::make_shared()}, + {"result_rows", std::make_shared()}, + {"result_bytes", std::make_shared()}, + }; +} + +void StorageSystemViewRefreshes::fillData( + MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + auto access = context->getAccess(); + // TODO: Do we need to add new access type? + auto valid_access = AccessType::SHOW_TABLES; + bool check_access_for_tables = !access->isGranted(valid_access); + + + for (const auto & refresh : context->getRefreshSet().getInfo()) + { + if (check_access_for_tables && !access->isGranted(valid_access, refresh.database, refresh.view_name)) + continue; + + std::size_t i = 0; + res_columns[i++]->insert(refresh.database); + res_columns[i++]->insert(refresh.view_name); + res_columns[i++]->insert(refresh.refresh_status); + res_columns[i++]->insert(refresh.last_refresh_status); + res_columns[i++]->insert(refresh.last_refresh_time); + res_columns[i++]->insert(refresh.next_refresh_time); + res_columns[i++]->insert(refresh.progress); + res_columns[i++]->insert(refresh.elapsed_ns); + res_columns[i++]->insert(refresh.read_rows); + res_columns[i++]->insert(refresh.read_bytes); + res_columns[i++]->insert(refresh.total_rows_to_read); + res_columns[i++]->insert(refresh.total_bytes_to_read); + res_columns[i++]->insert(refresh.written_rows); + res_columns[i++]->insert(refresh.written_bytes); + res_columns[i++]->insert(refresh.result_rows); + res_columns[i++]->insert(refresh.result_bytes); + } +} + +} diff --git a/src/Storages/System/StorageSystemViewRefreshes.h b/src/Storages/System/StorageSystemViewRefreshes.h new file mode 100644 index 00000000000..475ad45e68f --- /dev/null +++ b/src/Storages/System/StorageSystemViewRefreshes.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + + +class StorageSystemViewRefreshes final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemViewRefreshes"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} + diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index ffe74f1c94a..b907b97f0dd 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -87,6 +87,7 @@ #include #include #include +#include #if defined(__ELF__) && !defined(OS_FREEBSD) #include @@ -209,6 +210,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "jemalloc_bins"); attach(context, system_database, "s3queue"); attach(context, system_database, "dashboards"); + attach(context, system_database, "view_refreshes"); if (has_zookeeper) { From ba766edb530d481a9ed50aee36f4b46aa9b1d23b Mon Sep 17 00:00:00 2001 From: koloshmet Date: Mon, 13 Feb 2023 04:18:01 +0200 Subject: [PATCH 56/88] refreshable view query test --- .../0_stateless/02661_refreshable_materialized_views.reference | 1 + .../0_stateless/02661_refreshable_materialized_views.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02661_refreshable_materialized_views.reference create mode 100644 tests/queries/0_stateless/02661_refreshable_materialized_views.sql diff --git a/tests/queries/0_stateless/02661_refreshable_materialized_views.reference b/tests/queries/0_stateless/02661_refreshable_materialized_views.reference new file mode 100644 index 00000000000..663d951b8b5 --- /dev/null +++ b/tests/queries/0_stateless/02661_refreshable_materialized_views.reference @@ -0,0 +1 @@ +Scheduled Unknown diff --git a/tests/queries/0_stateless/02661_refreshable_materialized_views.sql b/tests/queries/0_stateless/02661_refreshable_materialized_views.sql new file mode 100644 index 00000000000..0b8a6a5289e --- /dev/null +++ b/tests/queries/0_stateless/02661_refreshable_materialized_views.sql @@ -0,0 +1,3 @@ +CREATE MATERIALIZED VIEW test REFRESH AFTER 15 SECOND ENGINE = MergeTree() ORDER BY number AS SELECT * FROM system.numbers LIMIT 10; + +SELECT refresh_status, last_refresh_status FROM system.view_refreshes WHERE view = 'test'; From 3556c15c51fa143d4590c936edea15e0b434f009 Mon Sep 17 00:00:00 2001 From: koloshmet Date: Mon, 13 Feb 2023 13:23:34 +0200 Subject: [PATCH 57/88] fixed tests --- src/Access/Common/AccessType.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 1f0e678461f..0188cbb5b99 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -177,7 +177,7 @@ enum class AccessType M(SYSTEM_MOVES, "SYSTEM STOP MOVES, SYSTEM START MOVES, STOP MOVES, START MOVES", TABLE, SYSTEM) \ M(SYSTEM_PULLING_REPLICATION_LOG, "SYSTEM STOP PULLING REPLICATION LOG, SYSTEM START PULLING REPLICATION LOG", TABLE, SYSTEM) \ M(SYSTEM_CLEANUP, "SYSTEM STOP CLEANUP, SYSTEM START CLEANUP", TABLE, SYSTEM) \ - M(SYSTEM_VIEWS, "SYSTEM REFRESH VIEW, START VIEWS, STOP VIEWS, CANCEL VIEW, PAUSE VIEW, RESUME VIEW", VIEW, SYSTEM) \ + M(SYSTEM_VIEWS, "SYSTEM REFRESH VIEW, SYSTEM START VIEWS, SYSTEM STOP VIEWS, SYSTEM START VIEW, SYSTEM STOP VIEW, SYSTEM CANCEL VIEW, SYSTEM PAUSE VIEW, SYSTEM RESUME VIEW, REFRESH VIEW, START VIEWS, STOP VIEWS, START VIEW, STOP VIEW, CANCEL VIEW, PAUSE VIEW, RESUME VIEW", VIEW, SYSTEM) \ M(SYSTEM_DISTRIBUTED_SENDS, "SYSTEM STOP DISTRIBUTED SENDS, SYSTEM START DISTRIBUTED SENDS, STOP DISTRIBUTED SENDS, START DISTRIBUTED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \ From d1932763f38f4740a4f67aa78039a07c8ceb0545 Mon Sep 17 00:00:00 2001 From: koloshmet Date: Mon, 13 Feb 2023 13:41:10 +0200 Subject: [PATCH 58/88] fixed style --- src/Parsers/ASTTimeInterval.h | 2 +- src/Storages/MaterializedView/RefreshAllCombiner.h | 2 +- src/Storages/MaterializedView/RefreshDependencies.h | 2 +- src/Storages/MaterializedView/RefreshSet.cpp | 5 +++++ src/Storages/MaterializedView/RefreshSet.h | 9 ++------- src/Storages/MaterializedView/RefreshTask.cpp | 2 +- src/Storages/MaterializedView/RefreshTask.h | 2 +- src/Storages/System/StorageSystemViewRefreshes.cpp | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Parsers/ASTTimeInterval.h b/src/Parsers/ASTTimeInterval.h index 374d1e1ec55..a8f2518d180 100644 --- a/src/Parsers/ASTTimeInterval.h +++ b/src/Parsers/ASTTimeInterval.h @@ -1,4 +1,4 @@ - #pragma once +#pragma once #include diff --git a/src/Storages/MaterializedView/RefreshAllCombiner.h b/src/Storages/MaterializedView/RefreshAllCombiner.h index f4faf073ae4..3b74018f6cf 100644 --- a/src/Storages/MaterializedView/RefreshAllCombiner.h +++ b/src/Storages/MaterializedView/RefreshAllCombiner.h @@ -6,7 +6,7 @@ namespace DB { /// Concurrent primitive for dependency completeness registration -/// When arrive methods return true, dependant task must be executed (or scheduled) +/// When arrive methods return true, dependent task must be executed (or scheduled) class RefreshAllCombiner { public: diff --git a/src/Storages/MaterializedView/RefreshDependencies.h b/src/Storages/MaterializedView/RefreshDependencies.h index a4488053adf..8d370f96d40 100644 --- a/src/Storages/MaterializedView/RefreshDependencies.h +++ b/src/Storages/MaterializedView/RefreshDependencies.h @@ -12,7 +12,7 @@ namespace DB class RefreshTask; -/// Concurrent primitive for managing list of dependant task and notifying them +/// Concurrent primitive for managing list of dependent task and notifying them class RefreshDependencies { using Container = std::list; diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index c38d3408495..8cae074d4a3 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -1,6 +1,11 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric Refresh; +} + namespace DB { diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h index 48962d8c96d..255fa4dbd66 100644 --- a/src/Storages/MaterializedView/RefreshSet.h +++ b/src/Storages/MaterializedView/RefreshSet.h @@ -6,15 +6,10 @@ #include -namespace CurrentMetrics -{ - extern const Metric Refresh; -} - -namespace DB +namespace DB { -struct RefreshInfo +struct RefreshInfo { String database; String view_name; diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 579d3252865..163ab3362eb 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -7,7 +7,7 @@ #include #include -namespace DB +namespace DB { namespace ErrorCodes diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 5e9bb618372..ea289562c7e 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -66,7 +66,7 @@ public: /// Resume task execution void resume(); - /// Notify dependant task + /// Notify dependent task void notify(const StorageID & parent_id); private: diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 0ddf8a48c9d..e78059e1a88 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -38,7 +38,7 @@ void StorageSystemViewRefreshes::fillData( auto valid_access = AccessType::SHOW_TABLES; bool check_access_for_tables = !access->isGranted(valid_access); - + for (const auto & refresh : context->getRefreshSet().getInfo()) { if (check_access_for_tables && !access->isGranted(valid_access, refresh.database, refresh.view_name)) From f14114dafc3559ab80f146e1c476a48622b8ccf4 Mon Sep 17 00:00:00 2001 From: koloshmet Date: Wed, 15 Feb 2023 04:58:26 +0200 Subject: [PATCH 59/88] proper tmp table cleanup --- src/Storages/MaterializedView/RefreshTask.cpp | 19 ++++++++++++-- src/Storages/MaterializedView/RefreshTask.h | 4 ++- src/Storages/StorageMaterializedView.cpp | 25 +++++++++++-------- src/Storages/StorageMaterializedView.h | 6 ++--- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 163ab3362eb..fdf3948de70 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -168,12 +169,14 @@ void RefreshTask::refresh() storeLastState(LastTaskState::Finished); break; case ExecutionResult::Cancelled: + cancelRefresh(view); storeLastState(LastTaskState::Canceled); break; } refresh_executor.reset(); refresh_block.reset(); + refresh_query.reset(); storeLastRefresh(std::chrono::system_clock::now()); scheduleRefresh(last_refresh); @@ -193,9 +196,12 @@ RefreshTask::ExecutionResult RefreshTask::executeRefresh() } -void RefreshTask::initializeRefresh(std::shared_ptr view) +void RefreshTask::initializeRefresh(std::shared_ptr view) { + auto fresh_table = view->createFreshTable(); refresh_query = view->prepareRefreshQuery(); + refresh_query->setTable(fresh_table.table_name); + refresh_query->setDatabase(fresh_table.database_name); auto refresh_context = Context::createCopy(view->getContext()); refresh_block = InterpreterInsertQuery(refresh_query, refresh_context).execute(); refresh_block->pipeline.setProgressCallback([this](const Progress & progress){ progressCallback(progress); }); @@ -208,8 +214,17 @@ void RefreshTask::initializeRefresh(std::shared_ptr vie void RefreshTask::completeRefresh(std::shared_ptr view) { - view->updateInnerTableAfterRefresh(refresh_query); + auto stale_table = view->exchangeTargetTable(refresh_query->table_id); dependencies.notifyAll(view->getStorageID()); + + auto drop_context = Context::createCopy(view->getContext()); + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, stale_table, /*sync=*/true); +} + +void RefreshTask::cancelRefresh(std::shared_ptr view) +{ + auto drop_context = Context::createCopy(view->getContext()); + InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, refresh_query->table_id, /*sync=*/true); } void RefreshTask::scheduleRefresh(std::chrono::system_clock::time_point now) diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index ea289562c7e..36c03e2c7df 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -85,10 +85,12 @@ private: ExecutionResult executeRefresh(); - void initializeRefresh(std::shared_ptr view); + void initializeRefresh(std::shared_ptr view); void completeRefresh(std::shared_ptr view); + void cancelRefresh(std::shared_ptr view); + std::chrono::sys_seconds calculateRefreshTime(std::chrono::system_clock::time_point now) const; std::chrono::seconds genSpreadSeconds(); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 43e8e0d6f33..42191bde90e 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -285,7 +285,7 @@ bool StorageMaterializedView::optimize( return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } -std::shared_ptr StorageMaterializedView::prepareRefreshQuery() +StorageID StorageMaterializedView::createFreshTable() const { auto inner_table_id = getTargetTableId(); auto new_table_name = ".tmp" + generateInnerTableName(getStorageID()); @@ -305,26 +305,29 @@ std::shared_ptr StorageMaterializedView::prepareRefreshQuery() create_interpreter.setInternal(true); create_interpreter.execute(); - auto insert_query = std::make_shared(); - insert_query->setTable(new_table_name); - insert_query->setDatabase(db->getDatabaseName()); - insert_query->select = getInMemoryMetadataPtr()->getSelectQuery().select_query; + return DatabaseCatalog::instance().getTable({create_query.getDatabase(), create_query.getTable()}, getContext())->getStorageID(); +} +std::shared_ptr StorageMaterializedView::prepareRefreshQuery() const +{ + auto insert_query = std::make_shared(); + insert_query->select = getInMemoryMetadataPtr()->getSelectQuery().select_query; return insert_query; } -void StorageMaterializedView::updateInnerTableAfterRefresh(std::shared_ptr refresh_query) +StorageID StorageMaterializedView::exchangeTargetTable(const StorageID & fresh_table) { - auto inner_table_id = getTargetTableId(); + auto stale_table_id = getTargetTableId(); - auto db = DatabaseCatalog::instance().getDatabase(inner_table_id.database_name); - auto target_db = DatabaseCatalog::instance().getDatabase(refresh_query->getDatabase()); + auto db = DatabaseCatalog::instance().getDatabase(stale_table_id.database_name); + auto target_db = DatabaseCatalog::instance().getDatabase(fresh_table.database_name); auto rename_ctx = Context::createCopy(getContext()); target_db->renameTable( - rename_ctx, refresh_query->getTable(), *db, inner_table_id.table_name, /*exchange=*/true, /*dictionary=*/false); + rename_ctx, fresh_table.table_name, *db, stale_table_id.table_name, /*exchange=*/true, /*dictionary=*/false); - setTargetTableId(db->getTable(refresh_query->getTable(), getContext())->getStorageID()); + setTargetTableId(fresh_table); + return stale_table_id; } void StorageMaterializedView::alter( diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 2dce8355b47..f3b777d34fa 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -119,9 +119,9 @@ private: void checkStatementCanBeForwarded() const; - std::shared_ptr prepareRefreshQuery(); - - void updateInnerTableAfterRefresh(std::shared_ptr refresh_query); + StorageID createFreshTable() const; + std::shared_ptr prepareRefreshQuery() const; + StorageID exchangeTargetTable(const StorageID & fresh_table); StorageID getTargetTableId() const; void setTargetTableId(StorageID id); From f1161566b46757492f01b995ae047fabd4810bcb Mon Sep 17 00:00:00 2001 From: koloshmet Date: Wed, 15 Feb 2023 07:04:13 +0200 Subject: [PATCH 60/88] proper tmp table cleanup --- src/Storages/MaterializedView/RefreshTask.cpp | 8 +++++--- src/Storages/StorageMaterializedView.cpp | 6 ++++-- src/Storages/StorageMaterializedView.h | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index fdf3948de70..fa5b3df75a2 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -188,6 +188,11 @@ RefreshTask::ExecutionResult RefreshTask::executeRefresh() while (!interrupt_execution.load() && not_finished) not_finished = refresh_executor->executeStep(interrupt_execution); + auto defer = make_scope_guard([this] + { + canceled.store(false); + interrupt_execution.store(false); + }); if (!not_finished) return ExecutionResult::Finished; if (interrupt_execution.load() && !canceled.load()) @@ -206,9 +211,6 @@ void RefreshTask::initializeRefresh(std::shared_ptrpipeline.setProgressCallback([this](const Progress & progress){ progressCallback(progress); }); - canceled.store(false); - interrupt_execution.store(false); - refresh_executor.emplace(refresh_block->pipeline); } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 42191bde90e..0939ddcd91a 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -315,7 +315,7 @@ std::shared_ptr StorageMaterializedView::prepareRefreshQuery() c return insert_query; } -StorageID StorageMaterializedView::exchangeTargetTable(const StorageID & fresh_table) +StorageID StorageMaterializedView::exchangeTargetTable(StorageID fresh_table) { auto stale_table_id = getTargetTableId(); @@ -326,7 +326,9 @@ StorageID StorageMaterializedView::exchangeTargetTable(const StorageID & fresh_t target_db->renameTable( rename_ctx, fresh_table.table_name, *db, stale_table_id.table_name, /*exchange=*/true, /*dictionary=*/false); - setTargetTableId(fresh_table); + std::swap(stale_table_id.database_name, fresh_table.database_name); + std::swap(stale_table_id.table_name, fresh_table.table_name); + setTargetTableId(std::move(fresh_table)); return stale_table_id; } diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index f3b777d34fa..ff7b50340c7 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -121,7 +121,7 @@ private: StorageID createFreshTable() const; std::shared_ptr prepareRefreshQuery() const; - StorageID exchangeTargetTable(const StorageID & fresh_table); + StorageID exchangeTargetTable(StorageID fresh_table); StorageID getTargetTableId() const; void setTargetTableId(StorageID id); From 808cb0fa0514e1c4b624961a46c406b873609e68 Mon Sep 17 00:00:00 2001 From: koloshmet Date: Wed, 22 Feb 2023 05:01:21 +0200 Subject: [PATCH 61/88] fix fix fix --- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- src/Parsers/ParserRefreshStrategy.cpp | 12 +-- src/Storages/MaterializedView/RefreshSet.cpp | 1 - src/Storages/MaterializedView/RefreshTask.cpp | 89 ++++++++++++++----- src/Storages/MaterializedView/RefreshTask.h | 19 +++- src/Storages/StorageMaterializedView.cpp | 2 +- .../integration/test_grant_and_revoke/test.py | 2 +- ...1_refreshable_materialized_views.reference | 8 +- .../02661_refreshable_materialized_views.sql | 21 ++++- 9 files changed, 119 insertions(+), 37 deletions(-) diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index b5a15513a89..47c01d66570 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -51,7 +51,7 @@ TEST(AccessRights, Union) "CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, " "TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, " "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " - "SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " + "SYSTEM MOVES, SYSTEM VIEWS, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/src/Parsers/ParserRefreshStrategy.cpp b/src/Parsers/ParserRefreshStrategy.cpp index a448556bd4f..a6fbb373ed3 100644 --- a/src/Parsers/ParserRefreshStrategy.cpp +++ b/src/Parsers/ParserRefreshStrategy.cpp @@ -28,14 +28,16 @@ bool ParserRefreshStrategy::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { refresh->schedule_kind = ASTRefreshStrategy::ScheduleKind::EVERY; ASTPtr period; - ASTPtr periodic_offset; if (!ParserTimePeriod{}.parse(pos, period, expected)) return false; - if (!ParserTimeInterval{}.parse(pos, periodic_offset, expected)) - return false; - refresh->set(refresh->period, period); - refresh->set(refresh->periodic_offset, periodic_offset); + if (ParserKeyword{"OFFSET"}.ignore(pos, expected)) + { + ASTPtr periodic_offset; + if (!ParserTimeInterval{}.parse(pos, periodic_offset, expected)) + return false; + refresh->set(refresh->periodic_offset, periodic_offset); + } } if (refresh->schedule_kind == ASTRefreshStrategy::ScheduleKind::UNKNOWN) return false; diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index 8cae074d4a3..528375bb951 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -68,7 +68,6 @@ bool RefreshSetLess::operator()(const StorageID & l, const StorageID & r) const RefreshSet::Entry::Entry() : parent_set{nullptr} - , metric_increment{} {} RefreshSet::Entry::Entry(Entry && other) noexcept diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index fa5b3df75a2..84d5bbfb84e 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -47,9 +47,9 @@ RefreshTask::RefreshTask( const ASTRefreshStrategy & strategy) : refresh_timer(makeRefreshTimer(strategy)) , refresh_spread{makeSpreadDistribution(strategy.spread)} + , canceled{false} , refresh_immediately{false} , interrupt_execution{false} - , canceled{false} {} RefreshTaskHolder RefreshTask::create( @@ -92,7 +92,7 @@ void RefreshTask::start() void RefreshTask::stop() { refresh_task->deactivate(); - cancel(); + cancelSync(); storeState(TaskState::Disabled); } @@ -104,20 +104,36 @@ void RefreshTask::run() void RefreshTask::cancel() { - canceled.store(true); - interrupt_execution.store(true); + std::lock_guard guard(state_mutex); + cancelLocked(); +} + +void RefreshTask::cancelSync() +{ + std::unique_lock lock(state_mutex); + cancelLocked(); + sync_canceled.wait(lock, [this] { return !canceled; }); } void RefreshTask::pause() { - interrupt_execution.store(true); + std::lock_guard guard(state_mutex); + if (state == TaskState::Running) + { + interrupt_execution.store(true); + state = TaskState::Paused; + } } void RefreshTask::resume() { - interrupt_execution.store(false); - refresh_immediately.store(true); - refresh_task->schedule(); + std::lock_guard guard(state_mutex); + if (state == TaskState::Paused) + { + refresh_immediately.store(true); + refresh_task->schedule(); + state = TaskState::Scheduled; + } } void RefreshTask::notify(const StorageID & parent_id) @@ -154,15 +170,17 @@ void RefreshTask::refresh() if (!view) return; + std::unique_lock lock(state_mutex); + if (!refresh_executor) initializeRefresh(view); storeState(TaskState::Running); - switch (executeRefresh()) + switch (executeRefresh(lock)) { case ExecutionResult::Paused: - storeState(TaskState::Paused); + pauseRefresh(view); return; case ExecutionResult::Finished: completeRefresh(view); @@ -174,28 +192,24 @@ void RefreshTask::refresh() break; } - refresh_executor.reset(); - refresh_block.reset(); - refresh_query.reset(); + cleanState(); storeLastRefresh(std::chrono::system_clock::now()); scheduleRefresh(last_refresh); } -RefreshTask::ExecutionResult RefreshTask::executeRefresh() +RefreshTask::ExecutionResult RefreshTask::executeRefresh(std::unique_lock & state_lock) { + state_lock.unlock(); + bool not_finished{true}; while (!interrupt_execution.load() && not_finished) not_finished = refresh_executor->executeStep(interrupt_execution); - auto defer = make_scope_guard([this] - { - canceled.store(false); - interrupt_execution.store(false); - }); + state_lock.lock(); if (!not_finished) return ExecutionResult::Finished; - if (interrupt_execution.load() && !canceled.load()) + if (interrupt_execution.load() && !canceled) return ExecutionResult::Paused; return ExecutionResult::Cancelled; @@ -227,6 +241,14 @@ void RefreshTask::cancelRefresh(std::shared_ptr v { auto drop_context = Context::createCopy(view->getContext()); InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, refresh_query->table_id, /*sync=*/true); + interrupt_execution.store(false); + if (std::exchange(canceled, false)) + sync_canceled.notify_all(); +} + +void RefreshTask::pauseRefresh(std::shared_ptr /*view*/) +{ + interrupt_execution.store(false); } void RefreshTask::scheduleRefresh(std::chrono::system_clock::time_point now) @@ -275,6 +297,31 @@ void RefreshTask::progressCallback(const Progress & progress) set_entry->elapsed_ns.store(progress.elapsed_ns, std::memory_order_relaxed); } +void RefreshTask::cancelLocked() +{ + switch (state) + { + case TaskState::Running: + canceled = true; + interrupt_execution.store(true); + break; + case TaskState::Paused: + if (auto view = lockView()) + cancelRefresh(view); + cleanState(); + break; + default: + break; + } +} + +void RefreshTask::cleanState() +{ + refresh_executor.reset(); + refresh_block.reset(); + refresh_query.reset(); +} + std::shared_ptr RefreshTask::lockView() { return std::static_pointer_cast(view_to_refresh.lock()); @@ -282,7 +329,7 @@ std::shared_ptr RefreshTask::lockView() void RefreshTask::storeState(TaskState task_state) { - state.store(task_state); + state = task_state; set_entry->state.store(static_cast(task_state)); } diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 36c03e2c7df..894b7f2fea0 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -38,7 +38,7 @@ public: }; /// Never call it manual, public for shared_ptr construction only - RefreshTask(const ASTRefreshStrategy & strategy); + explicit RefreshTask(const ASTRefreshStrategy & strategy); /// The only proper way to construct task static RefreshTaskHolder create( @@ -60,6 +60,9 @@ public: /// Cancel task execution void cancel(); + /// Cancel task execution synchronously + void cancelSync(); + /// Pause task execution (must be either resumed or canceled later) void pause(); @@ -83,7 +86,7 @@ private: void refresh(); - ExecutionResult executeRefresh(); + ExecutionResult executeRefresh(std::unique_lock & state_lock); void initializeRefresh(std::shared_ptr view); @@ -91,6 +94,8 @@ private: void cancelRefresh(std::shared_ptr view); + void pauseRefresh(std::shared_ptr view); + std::chrono::sys_seconds calculateRefreshTime(std::chrono::system_clock::time_point now) const; std::chrono::seconds genSpreadSeconds(); @@ -106,6 +111,10 @@ private: }; } + void cancelLocked(); + + void cleanState(); + std::shared_ptr lockView(); void storeState(TaskState task_state); @@ -140,13 +149,15 @@ private: std::uniform_int_distribution refresh_spread; /// Task state - std::atomic state{TaskState::Disabled}; + std::mutex state_mutex; + std::condition_variable sync_canceled; + TaskState state{TaskState::Disabled}; LastTaskState last_state{LastTaskState::Unknown}; + bool canceled; /// Outer triggers std::atomic_bool refresh_immediately; std::atomic_bool interrupt_execution; - std::atomic_bool canceled; }; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 0939ddcd91a..9cb21bdb06f 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -282,7 +282,7 @@ bool StorageMaterializedView::optimize( checkStatementCanBeForwarded(); auto storage_ptr = getTargetTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); - return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); + return storage_ptr->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context); } StorageID StorageMaterializedView::createFreshTable() const diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index c8a0ee541e2..b257a551d57 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -188,7 +188,7 @@ def test_grant_all_on_table(): instance.query("SHOW GRANTS FOR B") == "GRANT SHOW TABLES, SHOW COLUMNS, SHOW DICTIONARIES, SELECT, INSERT, ALTER TABLE, ALTER VIEW, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, " "DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, SHOW ROW POLICIES, " - "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " + "SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM VIEWS, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, " "SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM FLUSH DISTRIBUTED, dictGet ON test.table TO B\n" ) instance.query("REVOKE ALL ON test.table FROM B", user="A") diff --git a/tests/queries/0_stateless/02661_refreshable_materialized_views.reference b/tests/queries/0_stateless/02661_refreshable_materialized_views.reference index 663d951b8b5..bbb855348f7 100644 --- a/tests/queries/0_stateless/02661_refreshable_materialized_views.reference +++ b/tests/queries/0_stateless/02661_refreshable_materialized_views.reference @@ -1 +1,7 @@ -Scheduled Unknown +test +test +test1 +test2 +test Disabled +test1 Disabled +test2 Disabled diff --git a/tests/queries/0_stateless/02661_refreshable_materialized_views.sql b/tests/queries/0_stateless/02661_refreshable_materialized_views.sql index 0b8a6a5289e..4e5420509c1 100644 --- a/tests/queries/0_stateless/02661_refreshable_materialized_views.sql +++ b/tests/queries/0_stateless/02661_refreshable_materialized_views.sql @@ -1,3 +1,20 @@ -CREATE MATERIALIZED VIEW test REFRESH AFTER 15 SECOND ENGINE = MergeTree() ORDER BY number AS SELECT * FROM system.numbers LIMIT 10; +CREATE MATERIALIZED VIEW test REFRESH AFTER 15 SECOND ENGINE = MergeTree() ORDER BY number AS SELECT * FROM system.numbers LIMIT 10000000; -SELECT refresh_status, last_refresh_status FROM system.view_refreshes WHERE view = 'test'; +SELECT view FROM system.view_refreshes WHERE view = 'test'; + +CREATE MATERIALIZED VIEW test1 REFRESH EVERY 1 HOUR ENGINE = MergeTree() ORDER BY number AS SELECT * FROM test; + +CREATE MATERIALIZED VIEW test2 REFRESH EVERY 2 HOUR OFFSET 42 MINUTE 8 SECOND RANDOMIZE FOR 10 MINUTE ENGINE = MergeTree() ORDER BY number AS SELECT * FROM test; + +SELECT view FROM system.view_refreshes WHERE view LIKE 'test%' ORDER BY view; + +SYSTEM STOP VIEW test; +SYSTEM STOP VIEWS; + +SELECT view, refresh_status FROM system.view_refreshes WHERE view LIKE 'test%' ORDER BY view; + +SYSTEM START VIEWS; + +DROP VIEW test; +DROP VIEW test1; +DROP VIEW test2; From 5dc04a13a79c6f83600df5ec03e8d26f5717d2a2 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Sat, 18 Nov 2023 00:45:05 +0000 Subject: [PATCH 62/88] Simple review comments --- src/Common/CurrentMetrics.cpp | 2 +- src/Parsers/ASTRefreshStrategy.cpp | 17 ++++++++--------- src/Parsers/ASTTimeInterval.cpp | 8 ++++---- src/Parsers/ParserTimeInterval.cpp | 10 +++++++++- src/Storages/MaterializedView/RefreshSet.cpp | 4 ++-- src/Storages/MaterializedView/RefreshTask.cpp | 4 ++-- src/Storages/MaterializedView/RefreshTimers.cpp | 4 ++-- .../System/StorageSystemViewRefreshes.cpp | 3 +++ .../0_stateless/01271_show_privileges.reference | 1 + 9 files changed, 32 insertions(+), 21 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index fccdeaa3c57..87de19b8907 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -253,7 +253,7 @@ M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \ M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ - M(Refresh, "Number of active refreshes") \ + M(RefreshingViews, "Number of active refreshes") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Parsers/ASTRefreshStrategy.cpp b/src/Parsers/ASTRefreshStrategy.cpp index f168bcc09c1..ed8216cf4aa 100644 --- a/src/Parsers/ASTRefreshStrategy.cpp +++ b/src/Parsers/ASTRefreshStrategy.cpp @@ -22,8 +22,6 @@ ASTPtr ASTRefreshStrategy::clone() const res->set(res->settings, settings->clone()); if (dependencies) res->set(res->dependencies, dependencies->clone()); - res->interval = interval; - res->spread = spread; res->schedule_kind = schedule_kind; return res; } @@ -33,40 +31,41 @@ void ASTRefreshStrategy::formatImpl( { frame.need_parens = false; - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << "REFRESH "; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << "REFRESH " << (f_settings.hilite ? hilite_none : ""); using enum ScheduleKind; switch (schedule_kind) { case AFTER: - f_settings.ostr << "AFTER "; + f_settings.ostr << "AFTER " << (f_settings.hilite ? hilite_none : ""); interval->formatImpl(f_settings, state, frame); break; case EVERY: - f_settings.ostr << "EVERY "; + f_settings.ostr << "EVERY " << (f_settings.hilite ? hilite_none : ""); period->formatImpl(f_settings, state, frame); if (periodic_offset) { - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " OFFSET "; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " OFFSET " << (f_settings.hilite ? hilite_none : ""); periodic_offset->formatImpl(f_settings, state, frame); } break; default: + f_settings.ostr << (f_settings.hilite ? hilite_none : ""); break; } if (spread) { - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " RANDOMIZE FOR "; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " RANDOMIZE FOR " << (f_settings.hilite ? hilite_none : ""); spread->formatImpl(f_settings, state, frame); } if (dependencies) { - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " DEPENDS ON "; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " DEPENDS ON " << (f_settings.hilite ? hilite_none : ""); dependencies->formatImpl(f_settings, state, frame); } if (settings) { - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " SETTINGS "; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " SETTINGS " << (f_settings.hilite ? hilite_none : ""); settings->formatImpl(f_settings, state, frame); } } diff --git a/src/Parsers/ASTTimeInterval.cpp b/src/Parsers/ASTTimeInterval.cpp index 4edda531202..13e07f4ed65 100644 --- a/src/Parsers/ASTTimeInterval.cpp +++ b/src/Parsers/ASTTimeInterval.cpp @@ -15,8 +15,8 @@ ASTPtr ASTTimePeriod::clone() const void ASTTimePeriod::formatImpl(const FormatSettings & f_settings, FormatState &, FormatStateStacked frame) const { frame.need_parens = false; - f_settings.ostr << (f_settings.hilite ? hilite_none : "") << value << ' '; - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword(); + f_settings.ostr << value << ' '; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword() << (f_settings.hilite ? hilite_none : ""); } ASTPtr ASTTimeInterval::clone() const @@ -32,8 +32,8 @@ void ASTTimeInterval::formatImpl(const FormatSettings & f_settings, FormatState { if (!std::exchange(is_first, false)) f_settings.ostr << ' '; - f_settings.ostr << (f_settings.hilite ? hilite_none : "") << value << ' '; - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword(); + f_settings.ostr << value << ' '; + f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword() << (f_settings.hilite ? hilite_none : ""); } } diff --git a/src/Parsers/ParserTimeInterval.cpp b/src/Parsers/ParserTimeInterval.cpp index dac66883083..8168ae61a33 100644 --- a/src/Parsers/ParserTimeInterval.cpp +++ b/src/Parsers/ParserTimeInterval.cpp @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + namespace { @@ -27,7 +32,10 @@ std::optional parseValKind(IParser::Pos & pos, Expected & expected) return ValKind{ .empty = true }; if (!parseIntervalKind(pos, expected, kind)) return {}; - return ValKind{ value->as().value.safeGet(), kind, false }; + UInt64 val; + if (!value->as().value.tryGet(val)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Time interval must be an integer"); + return ValKind{ val, kind, false }; } } diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index 528375bb951..978fab26af0 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -3,7 +3,7 @@ namespace CurrentMetrics { - extern const Metric Refresh; + extern const Metric RefreshingViews; } namespace DB @@ -102,7 +102,7 @@ void RefreshSet::Entry::cleanup(RefreshSet * set) } RefreshSet::RefreshSet() - : set_metric(CurrentMetrics::Refresh) + : set_metric(CurrentMetrics::RefreshingViews) {} RefreshTaskHolder RefreshSet::getTask(const StorageID & id) const diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 84d5bbfb84e..6ab9fcb281c 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -37,7 +37,7 @@ std::variant makeRefreshTimer(const ASTRef case AFTER: return RefreshAfterTimer{strategy.interval}; default: - throw Exception("Unknown refresh strategy kind", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown refresh strategy kind"); } } @@ -64,7 +64,7 @@ RefreshTaskHolder RefreshTask::create( if (strategy.dependencies) { if (strategy.schedule_kind != ASTRefreshStrategy::ScheduleKind::AFTER) - throw Exception("Dependencies are allowed only for AFTER refresh kind", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dependencies are allowed only for AFTER refresh kind"); task->deps_entries.reserve(strategy.dependencies->children.size()); for (auto && dependency : strategy.dependencies->children) diff --git a/src/Storages/MaterializedView/RefreshTimers.cpp b/src/Storages/MaterializedView/RefreshTimers.cpp index 973eba46057..ebef561fc29 100644 --- a/src/Storages/MaterializedView/RefreshTimers.cpp +++ b/src/Storages/MaterializedView/RefreshTimers.cpp @@ -116,7 +116,7 @@ std::chrono::sys_seconds RefreshEveryTimer::alignedToYears(std::chrono::system_c if (auto prev_time = offset.after(prev_years); prev_time > tp) return prev_time; - auto next_years = normalize_years(tp_ymd.year() + std::chrono::years{1}); + auto next_years = normalize_years(std::chrono::year((int(tp_ymd.year()) / value + 1) * value)); return offset.after(next_years); } @@ -234,7 +234,7 @@ std::chrono::sys_seconds RefreshEveryTimer::alignedToSeconds(std::chrono::system auto tp_minutes = std::chrono::floor(tp); auto tp_seconds = std::chrono::floor(tp - tp_minutes); - auto next_seconds= (tp_seconds / value + 1s) * value; + auto next_seconds = (tp_seconds / value + 1s) * value; if (std::chrono::floor(next_seconds - 1s) > 0min) return tp_minutes + 1min + std::chrono::seconds{value}; return tp_minutes + next_seconds; diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index e78059e1a88..f30e2863c4a 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -1,6 +1,9 @@ #include #include +#include +#include +#include #include #include diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index e2c0655b2bc..b482cf7bd13 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -127,6 +127,7 @@ SYSTEM FETCHES ['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','STA SYSTEM MOVES ['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES'] TABLE SYSTEM SYSTEM PULLING REPLICATION LOG ['SYSTEM STOP PULLING REPLICATION LOG','SYSTEM START PULLING REPLICATION LOG'] TABLE SYSTEM SYSTEM CLEANUP ['SYSTEM STOP CLEANUP','SYSTEM START CLEANUP'] TABLE SYSTEM +SYSTEM VIEWS ['SYSTEM REFRESH VIEW','SYSTEM START VIEWS','SYSTEM STOP VIEWS','SYSTEM START VIEW','SYSTEM STOP VIEW','SYSTEM CANCEL VIEW','SYSTEM PAUSE VIEW','SYSTEM RESUME VIEW','REFRESH VIEW','START VIEWS','STOP VIEWS','START VIEW','STOP VIEW','CANCEL VIEW','PAUSE VIEW','RESUME VIEW'] VIEW SYSTEM SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS'] TABLE SYSTEM SENDS SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM From 01345981e286e7601a2ad22516af7d9f90922598 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 23 Nov 2023 05:08:44 +0000 Subject: [PATCH 63/88] Overhaul RefreshTask --- src/Interpreters/ActionLocksManager.cpp | 2 +- src/Interpreters/InterpreterSystemQuery.cpp | 8 +- .../MaterializedView/RefreshAllCombiner.h | 1 + src/Storages/MaterializedView/RefreshSet.cpp | 12 +- src/Storages/MaterializedView/RefreshSet.h | 40 +- src/Storages/MaterializedView/RefreshTask.cpp | 419 +++++++++++------- src/Storages/MaterializedView/RefreshTask.h | 141 +++--- src/Storages/StorageMaterializedView.cpp | 3 +- .../System/StorageSystemViewRefreshes.cpp | 4 +- .../02661_refreshable_materialized_views.sql | 2 +- 10 files changed, 347 insertions(+), 285 deletions(-) diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp index 43b49b024aa..65f13ebd66c 100644 --- a/src/Interpreters/ActionLocksManager.cpp +++ b/src/Interpreters/ActionLocksManager.cpp @@ -18,7 +18,7 @@ namespace ActionLocks extern const StorageActionBlockType PartsMove = 7; extern const StorageActionBlockType PullReplicationLog = 8; extern const StorageActionBlockType Cleanup = 9; - extern const StorageActionBlockType ViewRefresh = 8; + extern const StorageActionBlockType ViewRefresh = 10; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 2f504e97857..7df6499fd39 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -609,16 +609,12 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_CLEANUP: startStopAction(ActionLocks::Cleanup, true); break; + case Type::START_VIEW: case Type::START_VIEWS: startStopAction(ActionLocks::ViewRefresh, true); break; - case Type::STOP_VIEWS: - startStopAction(ActionLocks::ViewRefresh, false); - break; - case Type::START_VIEW: - startStopAction(ActionLocks::ViewRefresh, true); - break; case Type::STOP_VIEW: + case Type::STOP_VIEWS: startStopAction(ActionLocks::ViewRefresh, false); break; case Type::REFRESH_VIEW: diff --git a/src/Storages/MaterializedView/RefreshAllCombiner.h b/src/Storages/MaterializedView/RefreshAllCombiner.h index 3b74018f6cf..f9f3a8d319c 100644 --- a/src/Storages/MaterializedView/RefreshAllCombiner.h +++ b/src/Storages/MaterializedView/RefreshAllCombiner.h @@ -7,6 +7,7 @@ namespace DB /// Concurrent primitive for dependency completeness registration /// When arrive methods return true, dependent task must be executed (or scheduled) +/// TODO: Doesn't need to be thread safe. class RefreshAllCombiner { public: diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index 978fab26af0..9efd82e1afc 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -9,7 +9,7 @@ namespace CurrentMetrics namespace DB { -RefreshSetElement::RefreshSetElement(RefreshTaskHolder task, StorageID id) +RefreshSetElement::RefreshSetElement(StorageID id, RefreshTaskHolder task) : corresponding_task(task) , view_id(std::move(id)) {} @@ -19,8 +19,8 @@ RefreshInfo RefreshSetElement::getInfo() const return { .database = view_id.getDatabaseName(), .view_name = view_id.getTableName(), - .refresh_status = toString(RefreshTask::TaskState{state.load()}), - .last_refresh_status = toString(RefreshTask::LastTaskState{last_state.load()}), + .refresh_status = toString(RefreshTask::RefreshState{state.load()}), + .last_refresh_result = toString(RefreshTask::LastTaskResult{last_result.load()}), .last_refresh_time = static_cast(last_s.load(std::memory_order_relaxed)), .next_refresh_time = static_cast(next_s.load(std::memory_order_relaxed)), .progress = static_cast(written_rows) / total_rows_to_read, @@ -108,8 +108,8 @@ RefreshSet::RefreshSet() RefreshTaskHolder RefreshSet::getTask(const StorageID & id) const { std::lock_guard lock(elements_mutex); - if (auto element = elements.find(id); element != elements.end()) - return element->getTask(); + if (auto element = elements.find(id.uuid); element != elements.end()) + return element->second.getTask(); return nullptr; } @@ -119,7 +119,7 @@ RefreshSet::InfoContainer RefreshSet::getInfo() const InfoContainer res; res.reserve(elements.size()); for (auto && element : elements) - res.emplace_back(element.getInfo()); + res.emplace_back(element.second.getInfo()); return res; } diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h index 255fa4dbd66..cc5b0006218 100644 --- a/src/Storages/MaterializedView/RefreshSet.h +++ b/src/Storages/MaterializedView/RefreshSet.h @@ -14,7 +14,7 @@ struct RefreshInfo String database; String view_name; String refresh_status; - String last_refresh_status; + String last_refresh_result; UInt32 last_refresh_time; UInt32 next_refresh_time; Float64 progress; @@ -33,7 +33,7 @@ class RefreshSetElement { friend class RefreshTask; public: - RefreshSetElement(RefreshTaskHolder task, StorageID id); + RefreshSetElement(StorageID id, RefreshTaskHolder task); RefreshSetElement(const RefreshSetElement &) = delete; RefreshSetElement & operator=(const RefreshSetElement &) = delete; @@ -48,19 +48,19 @@ private: RefreshTaskObserver corresponding_task; StorageID view_id; - mutable std::atomic read_rows{0}; - mutable std::atomic read_bytes{0}; - mutable std::atomic total_rows_to_read{0}; - mutable std::atomic total_bytes_to_read{0}; - mutable std::atomic written_rows{0}; - mutable std::atomic written_bytes{0}; - mutable std::atomic result_rows{0}; - mutable std::atomic result_bytes{0}; - mutable std::atomic elapsed_ns{0}; - mutable std::atomic last_s{0}; - mutable std::atomic next_s{0}; - mutable std::atomic state{0}; - mutable std::atomic last_state{0}; + std::atomic read_rows{0}; + std::atomic read_bytes{0}; + std::atomic total_rows_to_read{0}; + std::atomic total_bytes_to_read{0}; + std::atomic written_rows{0}; + std::atomic written_bytes{0}; + std::atomic result_rows{0}; + std::atomic result_bytes{0}; + std::atomic elapsed_ns{0}; + std::atomic last_s{0}; + std::atomic next_s{0}; + std::atomic state{0}; + std::atomic last_result{0}; }; struct RefreshSetLess @@ -77,7 +77,7 @@ struct RefreshSetLess class RefreshSet { private: - using Container = std::set; + using Container = std::map; using ContainerIter = typename Container::iterator; public: @@ -92,7 +92,7 @@ public: ~Entry(); - const RefreshSetElement * operator->() const { return std::to_address(iter); } + RefreshSetElement * operator->() { return &iter->second; } private: RefreshSet * parent_set; @@ -111,11 +111,11 @@ public: RefreshSet(); - template - std::optional emplace(Args &&... args) + std::optional emplace(StorageID id, RefreshTaskHolder task) { std::lock_guard guard(elements_mutex); - if (auto [it, is_inserted] = elements.emplace(std::forward(args)...); is_inserted) + auto [it, is_inserted] = elements.emplace(std::piecewise_construct, std::forward_as_tuple(id.uuid), std::forward_as_tuple(id, std::move(task))); + if (is_inserted) return Entry(*this, std::move(it), set_metric); return {}; } diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 6ab9fcb281c..0a85f533a27 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -45,11 +45,9 @@ std::variant makeRefreshTimer(const ASTRef RefreshTask::RefreshTask( const ASTRefreshStrategy & strategy) - : refresh_timer(makeRefreshTimer(strategy)) + : log(&Poco::Logger::get("RefreshTask")) + , refresh_timer(makeRefreshTimer(strategy)) , refresh_spread{makeSpreadDistribution(strategy.spread)} - , canceled{false} - , refresh_immediately{false} - , interrupt_execution{false} {} RefreshTaskHolder RefreshTask::create( @@ -59,8 +57,13 @@ RefreshTaskHolder RefreshTask::create( { auto task = std::make_shared(strategy); - task->refresh_task = context->getSchedulePool().createTask("MaterializedViewRefresherTask", task->makePoolTask()); - task->set_entry = context->getRefreshSet().emplace(task, view.getStorageID()).value(); + task->refresh_task = context->getSchedulePool().createTask("MaterializedViewRefresherTask", + [self = task->weak_from_this()] + { + if (auto t = self.lock()) + t->refreshTask(); + }); + task->set_entry = context->getRefreshSet().emplace(view.getStorageID(), task).value(); if (strategy.dependencies) { if (strategy.schedule_kind != ASTRefreshStrategy::ScheduleKind::AFTER) @@ -70,153 +73,224 @@ RefreshTaskHolder RefreshTask::create( for (auto && dependency : strategy.dependencies->children) { StorageID dep_id(dependency->as()); + /// TODO: + /// * This depends on the order in which different tables are initialized. + /// Is the order guaranteed on startup? + /// * At what point does the table name from the query get mapped to the table's UUID? + /// Does it work at all? Is it reliable? + /// * Don't silently ignore if the table is missing. if (auto dep_task = context->getRefreshSet().getTask(dep_id)) - task->deps_entries.push_back(dep_task->dependencies.add(task)); + task->deps_entries.push_back(dep_task->dependencies.add(task)); } + + /// TODO: Initialize combiner. } return task; } -void RefreshTask::initialize(std::shared_ptr view) +void RefreshTask::initializeAndStart(std::shared_ptr view) { view_to_refresh = view; + /// TODO: Add a setting to stop views on startup, set `stop_requested = true` in that case. + calculateNextRefreshTime(std::chrono::system_clock::now()); + refresh_task->schedule(); } void RefreshTask::start() { - storeState(TaskState::Scheduled); - refresh_task->activateAndSchedule(); + std::lock_guard guard(mutex); + if (!std::exchange(stop_requested, false)) + return; + refresh_task->schedule(); } void RefreshTask::stop() { - refresh_task->deactivate(); - cancelSync(); - storeState(TaskState::Disabled); + std::lock_guard guard(mutex); + if (std::exchange(stop_requested, true)) + return; + interrupt_execution.store(true); + refresh_task->schedule(); } void RefreshTask::run() { - refresh_immediately.store(true); - refresh_task->activateAndSchedule(); + std::lock_guard guard(mutex); + if (std::exchange(refresh_immediately, true)) + return; + refresh_task->schedule(); } void RefreshTask::cancel() { - std::lock_guard guard(state_mutex); - cancelLocked(); -} - -void RefreshTask::cancelSync() -{ - std::unique_lock lock(state_mutex); - cancelLocked(); - sync_canceled.wait(lock, [this] { return !canceled; }); + std::lock_guard guard(mutex); + if (std::exchange(cancel_requested, true)) + return; + interrupt_execution.store(true); + refresh_task->schedule(); } void RefreshTask::pause() { - std::lock_guard guard(state_mutex); - if (state == TaskState::Running) - { - interrupt_execution.store(true); - state = TaskState::Paused; - } + std::lock_guard guard(mutex); + if (std::exchange(pause_requested, true)) + return; + interrupt_execution.store(true); + refresh_task->schedule(); } void RefreshTask::resume() { - std::lock_guard guard(state_mutex); - if (state == TaskState::Paused) - { - refresh_immediately.store(true); - refresh_task->schedule(); - state = TaskState::Scheduled; - } + std::lock_guard guard(mutex); + if (!std::exchange(pause_requested, false)) + return; + refresh_task->schedule(); } void RefreshTask::notify(const StorageID & parent_id) { - if (combiner.arriveParent(parent_id)) - { - refresh_immediately.store(true); - refresh_task->schedule(); - } -} - -void RefreshTask::doRefresh() -{ - if (refresh_immediately.exchange(false)) - { - refresh(); - } - else - { - auto now = std::chrono::system_clock::now(); - if (now >= next_refresh) - { - if (combiner.arriveTime()) - refresh(); - } - else - scheduleRefresh(now); - } -} - -void RefreshTask::refresh() -{ - auto view = lockView(); - if (!view) + std::lock_guard guard(mutex); + if (!combiner.arriveParent(parent_id)) return; - - std::unique_lock lock(state_mutex); - - if (!refresh_executor) - initializeRefresh(view); - - storeState(TaskState::Running); - - switch (executeRefresh(lock)) - { - case ExecutionResult::Paused: - pauseRefresh(view); - return; - case ExecutionResult::Finished: - completeRefresh(view); - storeLastState(LastTaskState::Finished); - break; - case ExecutionResult::Cancelled: - cancelRefresh(view); - storeLastState(LastTaskState::Canceled); - break; - } - - cleanState(); - - storeLastRefresh(std::chrono::system_clock::now()); - scheduleRefresh(last_refresh); + if (std::exchange(refresh_immediately, true)) + return; + refresh_task->schedule(); } -RefreshTask::ExecutionResult RefreshTask::executeRefresh(std::unique_lock & state_lock) +void RefreshTask::refreshTask() { - state_lock.unlock(); + try + { + std::unique_lock lock(mutex); - bool not_finished{true}; - while (!interrupt_execution.load() && not_finished) - not_finished = refresh_executor->executeStep(interrupt_execution); + /// Whoever breaks out of this loop should call reportState() first. + while (true) + { + chassert(lock.owns_lock()); - state_lock.lock(); - if (!not_finished) - return ExecutionResult::Finished; - if (interrupt_execution.load() && !canceled) - return ExecutionResult::Paused; - return ExecutionResult::Cancelled; + interrupt_execution.store(false); + /// Discard the active refresh if requested. + if ((stop_requested || cancel_requested) && refresh_executor) + { + lock.unlock(); + cancelRefresh(LastTaskResult::Canceled); + lock.lock(); + + if (cancel_requested) + { + /// Advance to the next refresh time according to schedule. + /// Otherwise we'd start another refresh immediately after canceling this one. + auto now = std::chrono::system_clock::now(); + if (now >= next_refresh_with_spread) + calculateNextRefreshTime(std::chrono::system_clock::now()); + } + } + + cancel_requested = false; + + if (pause_requested && !refresh_executor) + pause_requested = false; // no refresh to pause + + if (stop_requested || pause_requested) + { + /// Exit the task and wait for the user to start or resume, which will schedule the task again. + reportState(stop_requested ? RefreshState::Disabled : RefreshState::Paused); + break; + } + + if (!refresh_immediately && !refresh_executor) + { + auto now = std::chrono::system_clock::now(); + if (now >= next_refresh_with_spread) + { + if (combiner.arriveTime()) + refresh_immediately = true; + else + { + /// TODO: Put the array of remaining dependencies in RefreshSet, report it in the system table (update it from notify() too). + reportState(RefreshState::WaitingForDependencies); + break; + } + } + else + { + refresh_task->scheduleAfter( + std::chrono::duration_cast( + next_refresh_with_spread - now).count()); + reportState(RefreshState::Scheduled); + break; + } + } + + /// Perform a refresh. + + refresh_immediately = false; + + auto view = lockView(); + if (!view) + { + /// The view was dropped. This RefreshTask should be destroyed soon too. + /// (Maybe this is unreachable.) + reportState(RefreshState::Disabled); + break; + } + + reportState(RefreshState::Running); + + lock.unlock(); + + bool finished = false; + + try + { + if (!refresh_executor) + initializeRefresh(view); + + finished = executeRefresh(); + + if (finished) + completeRefresh(view, LastTaskResult::Finished); + } + catch (...) + { + tryLogCurrentException(log, "Refresh failed"); + + /// Don't leave a trash table. + if (!finished && refresh_query) + cancelRefresh(LastTaskResult::Exception); + + /// TODO: Put the exception message into RefreshSet, report it in the system table. + /// TODO: Backoff. Maybe just assigning next_refresh_* will work. + } + + lock.lock(); + + if (finished) + { + auto now = std::chrono::system_clock::now(); + reportLastRefreshTime(now); + calculateNextRefreshTime(now); + } + } + } + catch (...) + { + std::unique_lock lock(mutex); + stop_requested = true; + tryLogCurrentException(log, + "Unexpected exception in refresh scheduling, please investigate. The view will be stopped."); +#ifdef ABORT_ON_LOGICAL_ERROR + abortOnFailedAssertion("Unexpected exception in refresh scheduling"); +#endif + } } void RefreshTask::initializeRefresh(std::shared_ptr view) { + chassert(!refresh_query); + auto fresh_table = view->createFreshTable(); refresh_query = view->prepareRefreshQuery(); refresh_query->setTable(fresh_table.table_name); @@ -228,37 +302,53 @@ void RefreshTask::initializeRefresh(std::shared_ptrpipeline); } -void RefreshTask::completeRefresh(std::shared_ptr view) +bool RefreshTask::executeRefresh() +{ + bool not_finished{true}; + while (!interrupt_execution.load() && not_finished) + not_finished = refresh_executor->executeStep(interrupt_execution); + + return !not_finished; +} + +void RefreshTask::completeRefresh(std::shared_ptr view, LastTaskResult result) { auto stale_table = view->exchangeTargetTable(refresh_query->table_id); dependencies.notifyAll(view->getStorageID()); auto drop_context = Context::createCopy(view->getContext()); InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, stale_table, /*sync=*/true); + + cleanState(); + reportLastResult(result); } -void RefreshTask::cancelRefresh(std::shared_ptr view) +void RefreshTask::cancelRefresh(LastTaskResult result) { - auto drop_context = Context::createCopy(view->getContext()); - InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, refresh_query->table_id, /*sync=*/true); - interrupt_execution.store(false); - if (std::exchange(canceled, false)) - sync_canceled.notify_all(); + if (auto view = lockView()) + { + try + { + auto drop_context = Context::createCopy(view->getContext()); + InterpreterDropQuery::executeDropQuery( + ASTDropQuery::Kind::Drop, drop_context, drop_context, refresh_query->table_id, /*sync=*/true); + } + catch (...) + { + tryLogCurrentException(log, "Failed to drop temporary table after a failed refresh"); + /// Let's ignore this and keep going, at risk of accumulating many trash tables if this keeps happening. + } + } + + cleanState(); + reportLastResult(result); } -void RefreshTask::pauseRefresh(std::shared_ptr /*view*/) +void RefreshTask::cleanState() { - interrupt_execution.store(false); -} - -void RefreshTask::scheduleRefresh(std::chrono::system_clock::time_point now) -{ - using namespace std::chrono_literals; - auto scheduled_refresh = calculateRefreshTime(now) + genSpreadSeconds(); - storeNextRefresh(scheduled_refresh); - auto schedule_time = std::chrono::ceil(scheduled_refresh - now); - storeState(TaskState::Scheduled); - refresh_task->scheduleAfter(std::max(schedule_time, 0ms).count()); + refresh_executor.reset(); + refresh_block.reset(); + refresh_query.reset(); } namespace @@ -271,17 +361,38 @@ CombinedVisitor(Ts...) -> CombinedVisitor; } -std::chrono::sys_seconds RefreshTask::calculateRefreshTime(std::chrono::system_clock::time_point now) const +void RefreshTask::calculateNextRefreshTime(std::chrono::system_clock::time_point now) { - CombinedVisitor refresh_time_visitor{ - [now](const RefreshAfterTimer & timer) { return timer.after(now); }, - [now](const RefreshEveryTimer & timer) { return timer.next(now); }}; - return std::visit(std::move(refresh_time_visitor), refresh_timer); + /// TODO: Add a setting to randomize initial delay in case of AFTER, for the case when the server + /// is restarted more often than the refresh period. + /// TODO: Maybe do something like skip_update_after_seconds and skip_update_after_ratio. + /// Unclear if that's useful at all if the last refresh timestamp is not remembered across restarts. + + auto advance = [&](std::chrono::system_clock::time_point t) + { + CombinedVisitor refresh_time_visitor{ + [t](const RefreshAfterTimer & timer) { return timer.after(t); }, + [t](const RefreshEveryTimer & timer) { return timer.next(t); }}; + auto r = std::visit(std::move(refresh_time_visitor), refresh_timer); + chassert(r > t); + return r; + }; + + /// It's important to use time without spread here, otherwise we would do multiple refreshes instead + /// of one, if the generated spread is negative and the first refresh completes faster than the spread. + std::chrono::sys_seconds next = advance(next_refresh_without_spread); + if (next < now) + next = advance(now); // fell behind, skip to current time + + next_refresh_without_spread = next; + next_refresh_with_spread = next + std::chrono::seconds{refresh_spread(thread_local_rng)}; + + reportNextRefreshTime(next_refresh_with_spread); } -std::chrono::seconds RefreshTask::genSpreadSeconds() +std::shared_ptr RefreshTask::lockView() { - return std::chrono::seconds{refresh_spread(thread_local_rng)}; + return std::static_pointer_cast(view_to_refresh.lock()); } void RefreshTask::progressCallback(const Progress & progress) @@ -297,58 +408,24 @@ void RefreshTask::progressCallback(const Progress & progress) set_entry->elapsed_ns.store(progress.elapsed_ns, std::memory_order_relaxed); } -void RefreshTask::cancelLocked() +void RefreshTask::reportState(RefreshState s) { - switch (state) - { - case TaskState::Running: - canceled = true; - interrupt_execution.store(true); - break; - case TaskState::Paused: - if (auto view = lockView()) - cancelRefresh(view); - cleanState(); - break; - default: - break; - } + set_entry->state.store(static_cast(s)); } -void RefreshTask::cleanState() +void RefreshTask::reportLastResult(LastTaskResult r) { - refresh_executor.reset(); - refresh_block.reset(); - refresh_query.reset(); + set_entry->last_result.store(static_cast(r)); } -std::shared_ptr RefreshTask::lockView() +void RefreshTask::reportLastRefreshTime(std::chrono::system_clock::time_point last) { - return std::static_pointer_cast(view_to_refresh.lock()); -} - -void RefreshTask::storeState(TaskState task_state) -{ - state = task_state; - set_entry->state.store(static_cast(task_state)); -} - -void RefreshTask::storeLastState(LastTaskState task_state) -{ - last_state = task_state; - set_entry->last_state.store(static_cast(task_state)); -} - -void RefreshTask::storeLastRefresh(std::chrono::system_clock::time_point last) -{ - last_refresh = last; auto secs = std::chrono::floor(last); set_entry->last_s.store(secs.time_since_epoch().count()); } -void RefreshTask::storeNextRefresh(std::chrono::system_clock::time_point next) +void RefreshTask::reportNextRefreshTime(std::chrono::system_clock::time_point next) { - next_refresh = next; auto secs = std::chrono::floor(next); set_entry->next_s.store(secs.time_since_epoch().count()); } diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 894b7f2fea0..192a4776be0 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -22,18 +22,22 @@ class ASTRefreshStrategy; class RefreshTask : public std::enable_shared_from_this { public: - enum class TaskState : RefreshTaskStateUnderlying + /// Just for observability. + enum class RefreshState : RefreshTaskStateUnderlying { Disabled = 0, Scheduled, + WaitingForDependencies, Running, Paused }; - enum class LastTaskState : RefreshTaskStateUnderlying + /// Just for observability. + enum class LastTaskResult : RefreshTaskStateUnderlying { Unknown = 0, Canceled, + Exception, Finished }; @@ -46,7 +50,7 @@ public: ContextMutablePtr context, const DB::ASTRefreshStrategy & strategy); - void initialize(std::shared_ptr view); + void initializeAndStart(std::shared_ptr view); /// Enable task scheduling void start(); @@ -60,9 +64,6 @@ public: /// Cancel task execution void cancel(); - /// Cancel task execution synchronously - void cancelSync(); - /// Pause task execution (must be either resumed or canceled later) void pause(); @@ -73,64 +74,16 @@ public: void notify(const StorageID & parent_id); private: - enum class ExecutionResult : UInt8 - { - Finished, - Paused, - Cancelled - }; - - void doRefresh(); - - void scheduleRefresh(std::chrono::system_clock::time_point now); - - void refresh(); - - ExecutionResult executeRefresh(std::unique_lock & state_lock); - - void initializeRefresh(std::shared_ptr view); - - void completeRefresh(std::shared_ptr view); - - void cancelRefresh(std::shared_ptr view); - - void pauseRefresh(std::shared_ptr view); - - std::chrono::sys_seconds calculateRefreshTime(std::chrono::system_clock::time_point now) const; - - std::chrono::seconds genSpreadSeconds(); - - void progressCallback(const Progress & progress); - - auto makePoolTask() - { - return [self = this->weak_from_this()] - { - if (auto task = self.lock()) - task->doRefresh(); - }; - } - - void cancelLocked(); - - void cleanState(); - - std::shared_ptr lockView(); - - void storeState(TaskState task_state); - - void storeLastState(LastTaskState task_state); - - void storeLastRefresh(std::chrono::system_clock::time_point last); - - void storeNextRefresh(std::chrono::system_clock::time_point next); - - /// Task ownership - BackgroundSchedulePool::TaskHolder refresh_task; + Poco::Logger * log = nullptr; std::weak_ptr view_to_refresh; RefreshSet::Entry set_entry; - /// Task execution + /// Refresh schedule + std::variant refresh_timer; + std::uniform_int_distribution refresh_spread; + + /// Task execution. Non-empty iff a refresh is in progress (possibly paused). + /// Whoever unsets these should also call storeLastState(). std::optional refresh_executor; std::optional refresh_block; std::shared_ptr refresh_query; @@ -140,24 +93,60 @@ private: RefreshDependencies dependencies; std::vector deps_entries; - /// Refresh time settings and data - std::chrono::system_clock::time_point last_refresh; - std::chrono::system_clock::time_point next_refresh; - std::variant refresh_timer; + /// Protects all fields below (they're accessed by both refreshTask() and public methods). + /// Never locked for blocking operations (e.g. creating or dropping the internal table). + std::mutex mutex; - /// Refresh time randomization - std::uniform_int_distribution refresh_spread; + /// Refreshes are stopped (e.g. by SYSTEM STOP VIEW). + bool stop_requested = false; + /// Refreshes are paused (e.g. by SYSTEM PAUSE VIEW). + bool pause_requested = false; + /// Cancel current refresh, then reset this flag. + bool cancel_requested = false; - /// Task state - std::mutex state_mutex; - std::condition_variable sync_canceled; - TaskState state{TaskState::Disabled}; - LastTaskState last_state{LastTaskState::Unknown}; - bool canceled; + /// If true, we should start a refresh right away. All refreshes go through this flag. + bool refresh_immediately = false; - /// Outer triggers - std::atomic_bool refresh_immediately; - std::atomic_bool interrupt_execution; + /// If true, the refresh task should interrupt its query execution and reconsider what to do, + /// re-checking `stop_requested`, `cancel_requested`, etc. + std::atomic_bool interrupt_execution {false}; + + /// When to refresh next. Updated when a refresh is finished or canceled. + std::chrono::system_clock::time_point next_refresh_without_spread; + std::chrono::system_clock::time_point next_refresh_with_spread; + + /// Calls refreshTask() from background thread. + BackgroundSchedulePool::TaskHolder refresh_task; + + /// The main loop of the refresh task. It examines the state, sees what needs to be + /// done and does it. If there's nothing to do at the moment, returns; it's then scheduled again, + /// when needed, by public methods or by timer. + /// + /// Public methods just provide inputs for the refreshTask()'s decisions + /// (e.g. stop_requested, cancel_requested), they don't do anything significant themselves. + /// This adds some inefficiency: even trivial or no-op requests have to schedule a background + /// task instead of directly performing the operation; but the simplicity seems worth it, I had + /// a really hard time trying to organize this code in any other way. + void refreshTask(); + + /// Methods that do the actual work: creating/dropping internal table, executing the query. + void initializeRefresh(std::shared_ptr view); + bool executeRefresh(); + void completeRefresh(std::shared_ptr view, LastTaskResult result); + void cancelRefresh(LastTaskResult result); + void cleanState(); + + /// Assigns next_refresh_* + void calculateNextRefreshTime(std::chrono::system_clock::time_point now); + + std::shared_ptr lockView(); + + /// Methods that push information to RefreshSet, for observability. + void progressCallback(const Progress & progress); + void reportState(RefreshState s); + void reportLastResult(LastTaskResult r); + void reportLastRefreshTime(std::chrono::system_clock::time_point last); + void reportNextRefreshTime(std::chrono::system_clock::time_point next); }; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 9cb21bdb06f..4f2ffb38017 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -444,8 +444,7 @@ void StorageMaterializedView::startup() if (refresher) { - refresher->initialize(std::static_pointer_cast(shared_from_this())); - refresher->start(); + refresher->initializeAndStart(std::static_pointer_cast(shared_from_this())); } } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index f30e2863c4a..1e763f30991 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -17,7 +17,7 @@ NamesAndTypesList StorageSystemViewRefreshes::getNamesAndTypes() {"database", std::make_shared()}, {"view", std::make_shared()}, {"refresh_status", std::make_shared()}, - {"last_refresh_status", std::make_shared()}, + {"last_refresh_result", std::make_shared()}, {"last_refresh_time", std::make_shared()}, {"next_refresh_time", std::make_shared()}, {"progress", std::make_shared()}, @@ -51,7 +51,7 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.database); res_columns[i++]->insert(refresh.view_name); res_columns[i++]->insert(refresh.refresh_status); - res_columns[i++]->insert(refresh.last_refresh_status); + res_columns[i++]->insert(refresh.last_refresh_result); res_columns[i++]->insert(refresh.last_refresh_time); res_columns[i++]->insert(refresh.next_refresh_time); res_columns[i++]->insert(refresh.progress); diff --git a/tests/queries/0_stateless/02661_refreshable_materialized_views.sql b/tests/queries/0_stateless/02661_refreshable_materialized_views.sql index 4e5420509c1..531000b3fe6 100644 --- a/tests/queries/0_stateless/02661_refreshable_materialized_views.sql +++ b/tests/queries/0_stateless/02661_refreshable_materialized_views.sql @@ -4,7 +4,7 @@ SELECT view FROM system.view_refreshes WHERE view = 'test'; CREATE MATERIALIZED VIEW test1 REFRESH EVERY 1 HOUR ENGINE = MergeTree() ORDER BY number AS SELECT * FROM test; -CREATE MATERIALIZED VIEW test2 REFRESH EVERY 2 HOUR OFFSET 42 MINUTE 8 SECOND RANDOMIZE FOR 10 MINUTE ENGINE = MergeTree() ORDER BY number AS SELECT * FROM test; +CREATE MATERIALIZED VIEW test2 REFRESH EVERY 2 HOUR OFFSET 42 MINUTE 8 SECOND RANDOMIZE FOR 10 MINUTE ENGINE = MergeTree() ORDER BY number AS SELECT * FROM test; SELECT view FROM system.view_refreshes WHERE view LIKE 'test%' ORDER BY view; From 01369a0a8a1742679de85a7e507980b2ab610f3b Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 24 Nov 2023 01:32:45 +0000 Subject: [PATCH 64/88] Overhaul dependencies --- src/Common/CurrentMetrics.cpp | 3 +- src/Databases/TablesDependencyGraph.h | 2 +- src/Interpreters/AddDefaultDatabaseVisitor.h | 13 ++ src/Interpreters/InterpreterCreateQuery.cpp | 9 + src/Parsers/ParserRefreshStrategy.cpp | 6 +- .../MaterializedView/RefreshAllCombiner.cpp | 58 ------ .../MaterializedView/RefreshAllCombiner.h | 34 --- .../MaterializedView/RefreshDependencies.cpp | 60 ------ .../MaterializedView/RefreshDependencies.h | 56 ----- src/Storages/MaterializedView/RefreshSet.cpp | 100 +++++---- src/Storages/MaterializedView/RefreshSet.h | 66 +++--- src/Storages/MaterializedView/RefreshTask.cpp | 193 +++++++++++------- src/Storages/MaterializedView/RefreshTask.h | 23 ++- .../MaterializedView/RefreshTimers.cpp | 61 +++++- src/Storages/MaterializedView/RefreshTimers.h | 19 ++ src/Storages/StorageMaterializedView.cpp | 2 +- 16 files changed, 334 insertions(+), 371 deletions(-) delete mode 100644 src/Storages/MaterializedView/RefreshAllCombiner.cpp delete mode 100644 src/Storages/MaterializedView/RefreshAllCombiner.h delete mode 100644 src/Storages/MaterializedView/RefreshDependencies.cpp delete mode 100644 src/Storages/MaterializedView/RefreshDependencies.h diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 87de19b8907..2613e9ec116 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -253,7 +253,8 @@ M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \ M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ - M(RefreshingViews, "Number of active refreshes") \ + M(RefreshableViews, "Number materialized views with periodic refreshing (REFRESH)") \ + M(RefreshingViews, "Number of materialized views currently executing a refresh") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Databases/TablesDependencyGraph.h b/src/Databases/TablesDependencyGraph.h index e71d5ecc5fc..50be3bbf969 100644 --- a/src/Databases/TablesDependencyGraph.h +++ b/src/Databases/TablesDependencyGraph.h @@ -60,7 +60,7 @@ public: /// Removes all dependencies of "table_id", returns those dependencies. std::vector removeDependencies(const StorageID & table_id, bool remove_isolated_tables = false); - /// Removes a table from the graph and removes all references to in from the graph (both from its dependencies and dependents). + /// Removes a table from the graph and removes all references to it from the graph (both from its dependencies and dependents). bool removeTable(const StorageID & table_id); /// Removes tables from the graph by a specified filter. diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 27639c4b813..e6354467938 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -87,6 +88,12 @@ public: visit(child); } + void visit(ASTRefreshStrategy & refresh) const + { + ASTPtr unused; + visit(refresh, unused); + } + private: ContextPtr context; @@ -229,6 +236,12 @@ private: } } + void visit(ASTRefreshStrategy & refresh, ASTPtr &) const + { + for (auto & table : refresh.children) + tryVisit(table); + } + void visitChildren(IAST & ast) const { for (auto & child : ast.children) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 1eadb325e95..801a46f4167 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1210,6 +1210,15 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) visitor.visit(*create.select); } + if (create.refresh_strategy) + { + /// TODO: This doesn't work for some reason. + AddDefaultDatabaseVisitor visitor(getContext(), current_database); + visitor.visit(*create.refresh_strategy); + + /// TODO: For DEPENDS ON, check that the specified tables exist. + } + if (create.columns_list) { AddDefaultDatabaseVisitor visitor(getContext(), current_database); diff --git a/src/Parsers/ParserRefreshStrategy.cpp b/src/Parsers/ParserRefreshStrategy.cpp index a6fbb373ed3..05dd081e61d 100644 --- a/src/Parsers/ParserRefreshStrategy.cpp +++ b/src/Parsers/ParserRefreshStrategy.cpp @@ -54,10 +54,12 @@ bool ParserRefreshStrategy::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (ParserKeyword{"DEPENDS ON"}.ignore(pos, expected)) { ASTPtr dependencies; + auto list_parser = ParserList{ - std::make_unique(), + std::make_unique( + /*table_name_with_optional_uuid_*/ true, /*allow_query_parameter_*/ false), std::make_unique(TokenType::Comma), - /* allow_empty= */ false}; + /*allow_empty*/ false}; if (!list_parser.parse(pos, dependencies, expected)) return false; refresh->set(refresh->dependencies, dependencies); diff --git a/src/Storages/MaterializedView/RefreshAllCombiner.cpp b/src/Storages/MaterializedView/RefreshAllCombiner.cpp deleted file mode 100644 index 5cb06ade9c7..00000000000 --- a/src/Storages/MaterializedView/RefreshAllCombiner.cpp +++ /dev/null @@ -1,58 +0,0 @@ -#include - -#include - -namespace DB -{ - -RefreshAllCombiner::RefreshAllCombiner() - : time_arrived{false} -{} - -RefreshAllCombiner::RefreshAllCombiner(const std::vector & parents) - : time_arrived{false} -{ - parents_arrived.reserve(parents.size()); - for (auto && parent : parents) - parents_arrived.emplace(parent.uuid, false); -} - -bool RefreshAllCombiner::arriveTime() -{ - std::lock_guard lock(combiner_mutex); - time_arrived = true; - return allArrivedLocked(); -} - -bool RefreshAllCombiner::arriveParent(const StorageID & id) -{ - std::lock_guard lock(combiner_mutex); - parents_arrived[id.uuid] = true; - return allArrivedLocked(); -} - -void RefreshAllCombiner::flush() -{ - std::lock_guard lock(combiner_mutex); - flushLocked(); -} - -bool RefreshAllCombiner::allArrivedLocked() -{ - auto is_value = [](auto && key_value) { return key_value.second; }; - if (time_arrived && std::ranges::all_of(parents_arrived, is_value)) - { - flushLocked(); - return true; - } - return false; -} - -void RefreshAllCombiner::flushLocked() -{ - for (auto & [parent, arrived] : parents_arrived) - arrived = false; - time_arrived = false; -} - -} diff --git a/src/Storages/MaterializedView/RefreshAllCombiner.h b/src/Storages/MaterializedView/RefreshAllCombiner.h deleted file mode 100644 index f9f3a8d319c..00000000000 --- a/src/Storages/MaterializedView/RefreshAllCombiner.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/// Concurrent primitive for dependency completeness registration -/// When arrive methods return true, dependent task must be executed (or scheduled) -/// TODO: Doesn't need to be thread safe. -class RefreshAllCombiner -{ -public: - RefreshAllCombiner(); - - explicit RefreshAllCombiner(const std::vector & parents); - - bool arriveTime(); - - bool arriveParent(const StorageID & id); - - void flush(); - -private: - bool allArrivedLocked(); - - void flushLocked(); - - std::mutex combiner_mutex; - std::unordered_map parents_arrived; - bool time_arrived; -}; - -} diff --git a/src/Storages/MaterializedView/RefreshDependencies.cpp b/src/Storages/MaterializedView/RefreshDependencies.cpp deleted file mode 100644 index f1a834a6b3a..00000000000 --- a/src/Storages/MaterializedView/RefreshDependencies.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include - -#include - -namespace DB -{ - -RefreshDependencies::Entry::Entry(RefreshDependencies & deps, ContainerIter it) - : dependencies{&deps} - , entry_it{it} -{} - -RefreshDependencies::Entry::Entry(Entry && other) noexcept - : dependencies(std::exchange(other.dependencies, nullptr)) - , entry_it(std::move(other.entry_it)) -{} - -RefreshDependencies::Entry & RefreshDependencies::Entry::operator=(Entry && other) noexcept -{ - if (this == &other) - return *this; - cleanup(std::exchange(dependencies, std::exchange(other.dependencies, nullptr))); - entry_it = std::move(other.entry_it); - return *this; -} - -RefreshDependencies::Entry::~Entry() -{ - cleanup(dependencies); -} - -void RefreshDependencies::Entry::cleanup(RefreshDependencies * deps) -{ - if (deps) - deps->erase(entry_it); -} - -RefreshDependenciesEntry RefreshDependencies::add(RefreshTaskHolder dependency) -{ - std::lock_guard lock(dependencies_mutex); - return Entry(*this, dependencies.emplace(dependencies.end(), dependency)); -} - -void RefreshDependencies::notifyAll(const StorageID & id) -{ - std::lock_guard lock(dependencies_mutex); - for (auto && dep : dependencies) - { - if (auto task = dep.lock()) - task->notify(id); - } -} - -void RefreshDependencies::erase(ContainerIter it) -{ - std::lock_guard lock(dependencies_mutex); - dependencies.erase(it); -} - -} diff --git a/src/Storages/MaterializedView/RefreshDependencies.h b/src/Storages/MaterializedView/RefreshDependencies.h deleted file mode 100644 index 8d370f96d40..00000000000 --- a/src/Storages/MaterializedView/RefreshDependencies.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include - -#include - -#include - - -namespace DB -{ - -class RefreshTask; - -/// Concurrent primitive for managing list of dependent task and notifying them -class RefreshDependencies -{ - using Container = std::list; - using ContainerIter = typename Container::iterator; - -public: - class Entry - { - friend class RefreshDependencies; - - public: - Entry(Entry &&) noexcept; - Entry & operator=(Entry &&) noexcept; - - ~Entry(); - - private: - Entry(RefreshDependencies & deps, ContainerIter it); - - void cleanup(RefreshDependencies * deps); - - RefreshDependencies * dependencies; - ContainerIter entry_it; - }; - - RefreshDependencies() = default; - - Entry add(RefreshTaskHolder dependency); - - void notifyAll(const StorageID & id); - -private: - void erase(ContainerIter it); - - std::mutex dependencies_mutex; - std::list dependencies; -}; - -using RefreshDependenciesEntry = RefreshDependencies::Entry; - -} diff --git a/src/Storages/MaterializedView/RefreshSet.cpp b/src/Storages/MaterializedView/RefreshSet.cpp index 9efd82e1afc..bef628bc42b 100644 --- a/src/Storages/MaterializedView/RefreshSet.cpp +++ b/src/Storages/MaterializedView/RefreshSet.cpp @@ -3,15 +3,16 @@ namespace CurrentMetrics { - extern const Metric RefreshingViews; + extern const Metric RefreshableViews; } namespace DB { -RefreshSetElement::RefreshSetElement(StorageID id, RefreshTaskHolder task) +RefreshSetElement::RefreshSetElement(StorageID id, std::vector deps, RefreshTaskHolder task) : corresponding_task(task) , view_id(std::move(id)) + , dependencies(std::move(deps)) {} RefreshInfo RefreshSetElement::getInfo() const @@ -36,40 +37,21 @@ RefreshInfo RefreshSetElement::getInfo() const }; } -const StorageID & RefreshSetElement::getID() const -{ - return view_id; -} - RefreshTaskHolder RefreshSetElement::getTask() const { return corresponding_task.lock(); } -bool RefreshSetLess::operator()(const RefreshSetElement & l, const RefreshSetElement & r) const +const StorageID & RefreshSetElement::getID() const { - return l.getID().uuid < r.getID().uuid; + return view_id; } -bool RefreshSetLess::operator()(const StorageID & l, const RefreshSetElement & r) const +const std::vector & RefreshSetElement::getDependencies() const { - return l.uuid < r.getID().uuid; + return dependencies; } -bool RefreshSetLess::operator()(const RefreshSetElement & l, const StorageID & r) const -{ - return l.getID().uuid < r.uuid; -} - -bool RefreshSetLess::operator()(const StorageID & l, const StorageID & r) const -{ - return l.uuid < r.uuid; -} - -RefreshSet::Entry::Entry() - : parent_set{nullptr} -{} - RefreshSet::Entry::Entry(Entry && other) noexcept : parent_set{std::exchange(other.parent_set, nullptr)} , iter(std::move(other.iter)) @@ -80,7 +62,8 @@ RefreshSet::Entry & RefreshSet::Entry::operator=(Entry && other) noexcept { if (this == &other) return *this; - cleanup(std::exchange(parent_set, std::exchange(other.parent_set, nullptr))); + reset(); + parent_set = std::exchange(other.parent_set, nullptr); iter = std::move(other.iter); metric_increment = std::move(other.metric_increment); return *this; @@ -88,34 +71,51 @@ RefreshSet::Entry & RefreshSet::Entry::operator=(Entry && other) noexcept RefreshSet::Entry::~Entry() { - cleanup(parent_set); + reset(); } -RefreshSet::Entry::Entry(RefreshSet & set, ContainerIter it, const CurrentMetrics::Metric & metric) - : parent_set{&set}, iter(std::move(it)), metric_increment(metric) +RefreshSet::Entry::Entry(RefreshSet & set, ElementMapIter it) + : parent_set{&set}, iter(std::move(it)), metric_increment(CurrentMetrics::RefreshableViews) {} -void RefreshSet::Entry::cleanup(RefreshSet * set) +void RefreshSet::Entry::reset() { - if (set) - set->erase(iter); + if (!parent_set) + return; + std::exchange(parent_set, nullptr)->erase(iter); + metric_increment.reset(); } -RefreshSet::RefreshSet() - : set_metric(CurrentMetrics::RefreshingViews) -{} +RefreshSet::RefreshSet() {} + +RefreshSet::Entry RefreshSet::emplace(StorageID id, std::vector dependencies, RefreshTaskHolder task) +{ + std::lock_guard guard(mutex); + auto [it, is_inserted] = elements.emplace(std::piecewise_construct, std::forward_as_tuple(id), std::forward_as_tuple(id, dependencies, std::move(task))); + if (!is_inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Refresh set entry already exists for table {}", id.getFullTableName()); + + for (const StorageID & dep : dependencies) + { + auto [unused, dep_inserted] = dependents[dep].insert(id); + if (!dep_inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Refresh set entry already contains dependency of {} on {}", id.getFullTableName(), dep.getFullTableName()); + } + + return Entry(*this, std::move(it)); +} RefreshTaskHolder RefreshSet::getTask(const StorageID & id) const { - std::lock_guard lock(elements_mutex); - if (auto element = elements.find(id.uuid); element != elements.end()) + std::lock_guard lock(mutex); + if (auto element = elements.find(id); element != elements.end()) return element->second.getTask(); return nullptr; } RefreshSet::InfoContainer RefreshSet::getInfo() const { - std::lock_guard lock(elements_mutex); + std::lock_guard lock(mutex); InfoContainer res; res.reserve(elements.size()); for (auto && element : elements) @@ -123,9 +123,29 @@ RefreshSet::InfoContainer RefreshSet::getInfo() const return res; } -void RefreshSet::erase(ContainerIter it) +std::vector RefreshSet::getDependents(const StorageID & id) const { - std::lock_guard lock(elements_mutex); + std::lock_guard lock(mutex); + std::vector res; + auto it = dependents.find(id); + if (it == dependents.end()) + return {}; + for (auto & dep_id : it->second) + if (auto element = elements.find(dep_id); element != elements.end()) + res.push_back(element->second.getTask()); + return res; +} + +void RefreshSet::erase(ElementMapIter it) +{ + std::lock_guard lock(mutex); + for (const StorageID & dep : it->second.getDependencies()) + { + auto & set = dependents[dep]; + set.erase(it->second.getID()); + if (set.empty()) + dependents.erase(dep); + } elements.erase(it); } diff --git a/src/Storages/MaterializedView/RefreshSet.h b/src/Storages/MaterializedView/RefreshSet.h index cc5b0006218..b2685d67883 100644 --- a/src/Storages/MaterializedView/RefreshSet.h +++ b/src/Storages/MaterializedView/RefreshSet.h @@ -9,6 +9,8 @@ namespace DB { +using DatabaseAndTableNameSet = std::unordered_set; + struct RefreshInfo { String database; @@ -33,7 +35,7 @@ class RefreshSetElement { friend class RefreshTask; public: - RefreshSetElement(StorageID id, RefreshTaskHolder task); + RefreshSetElement(StorageID id, std::vector deps, RefreshTaskHolder task); RefreshSetElement(const RefreshSetElement &) = delete; RefreshSetElement & operator=(const RefreshSetElement &) = delete; @@ -41,12 +43,13 @@ public: RefreshInfo getInfo() const; RefreshTaskHolder getTask() const; - const StorageID & getID() const; + const std::vector & getDependencies() const; private: RefreshTaskObserver corresponding_task; StorageID view_id; + std::vector dependencies; std::atomic read_rows{0}; std::atomic read_bytes{0}; @@ -63,73 +66,62 @@ private: std::atomic last_result{0}; }; -struct RefreshSetLess -{ - using is_transparent = std::true_type; - - bool operator()(const RefreshSetElement & l, const RefreshSetElement & r) const; - bool operator()(const StorageID & l, const RefreshSetElement & r) const; - bool operator()(const RefreshSetElement & l, const StorageID & r) const; - bool operator()(const StorageID & l, const StorageID & r) const; -}; - /// Set of refreshable views class RefreshSet { private: - using Container = std::map; - using ContainerIter = typename Container::iterator; + using ElementMap = std::unordered_map; + using ElementMapIter = typename ElementMap::iterator; public: class Entry { friend class RefreshSet; public: - Entry(); + Entry() = default; Entry(Entry &&) noexcept; Entry & operator=(Entry &&) noexcept; ~Entry(); - RefreshSetElement * operator->() { return &iter->second; } + explicit operator bool() const { return parent_set != nullptr; } + RefreshSetElement * operator->() { chassert(parent_set); return &iter->second; } + + void reset(); private: - RefreshSet * parent_set; - ContainerIter iter; + RefreshSet * parent_set = nullptr; + ElementMapIter iter; std::optional metric_increment; - Entry( - RefreshSet & set, - ContainerIter it, - const CurrentMetrics::Metric & metric); - - void cleanup(RefreshSet * set); + Entry(RefreshSet & set, ElementMapIter it); }; using InfoContainer = std::vector; RefreshSet(); - std::optional emplace(StorageID id, RefreshTaskHolder task) - { - std::lock_guard guard(elements_mutex); - auto [it, is_inserted] = elements.emplace(std::piecewise_construct, std::forward_as_tuple(id.uuid), std::forward_as_tuple(id, std::move(task))); - if (is_inserted) - return Entry(*this, std::move(it), set_metric); - return {}; - } + Entry emplace(StorageID id, std::vector dependencies, RefreshTaskHolder task); RefreshTaskHolder getTask(const StorageID & id) const; InfoContainer getInfo() const; -private: - mutable std::mutex elements_mutex; - Container elements; - CurrentMetrics::Metric set_metric; + /// Get tasks that depend on the given one. + std::vector getDependents(const StorageID & id) const; - void erase(ContainerIter it); +private: + using DependentsMap = std::unordered_map; + + /// Protects the two maps below, not locked for any nontrivial operations (e.g. operations that + /// block or lock other mutexes). + mutable std::mutex mutex; + + ElementMap elements; + DependentsMap dependents; + + void erase(ElementMapIter it); }; using RefreshSetEntry = RefreshSet::Entry; diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 0a85f533a27..000ee7aa1bd 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -2,20 +2,21 @@ #include +#include #include #include #include #include #include +namespace CurrentMetrics +{ + extern const Metric RefreshingViews; +} + namespace DB { -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - namespace { @@ -27,26 +28,12 @@ std::uniform_int_distribution makeSpreadDistribution(const ASTTimePeriod return std::uniform_int_distribution(-limit, limit); } -std::variant makeRefreshTimer(const ASTRefreshStrategy & strategy) -{ - using enum ASTRefreshStrategy::ScheduleKind; - switch (strategy.schedule_kind) - { - case EVERY: - return RefreshEveryTimer{*strategy.period, strategy.interval}; - case AFTER: - return RefreshAfterTimer{strategy.interval}; - default: - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown refresh strategy kind"); - } -} - } RefreshTask::RefreshTask( const ASTRefreshStrategy & strategy) : log(&Poco::Logger::get("RefreshTask")) - , refresh_timer(makeRefreshTimer(strategy)) + , refresh_timer(strategy) , refresh_spread{makeSpreadDistribution(strategy.spread)} {} @@ -63,28 +50,13 @@ RefreshTaskHolder RefreshTask::create( if (auto t = self.lock()) t->refreshTask(); }); - task->set_entry = context->getRefreshSet().emplace(view.getStorageID(), task).value(); + + std::vector deps; if (strategy.dependencies) - { - if (strategy.schedule_kind != ASTRefreshStrategy::ScheduleKind::AFTER) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dependencies are allowed only for AFTER refresh kind"); - - task->deps_entries.reserve(strategy.dependencies->children.size()); for (auto && dependency : strategy.dependencies->children) - { - StorageID dep_id(dependency->as()); - /// TODO: - /// * This depends on the order in which different tables are initialized. - /// Is the order guaranteed on startup? - /// * At what point does the table name from the query get mapped to the table's UUID? - /// Does it work at all? Is it reliable? - /// * Don't silently ignore if the table is missing. - if (auto dep_task = context->getRefreshSet().getTask(dep_id)) - task->deps_entries.push_back(dep_task->dependencies.add(task)); - } + deps.emplace_back(dependency->as()); - /// TODO: Initialize combiner. - } + task->set_entry = context->getRefreshSet().emplace(view.getStorageID(), deps, task); return task; } @@ -93,6 +65,7 @@ void RefreshTask::initializeAndStart(std::shared_ptr vi { view_to_refresh = view; /// TODO: Add a setting to stop views on startup, set `stop_requested = true` in that case. + populateDependencies(); calculateNextRefreshTime(std::chrono::system_clock::now()); refresh_task->schedule(); } @@ -148,14 +121,78 @@ void RefreshTask::resume() refresh_task->schedule(); } -void RefreshTask::notify(const StorageID & parent_id) +void RefreshTask::shutdown() +{ + { + std::lock_guard guard(mutex); + stop_requested = true; + interrupt_execution.store(true); + } + + /// Wait for the task to return and prevent it from being scheduled in future. + refresh_task->deactivate(); + + /// Remove from RefreshSet on DROP, without waiting for the IStorage to be destroyed. + /// This matters because a table may get dropped and immediately created again with the same name, + /// while the old table's IStorage still exists (pinned by ongoing queries). + std::lock_guard guard(mutex); + set_entry.reset(); +} + +void RefreshTask::notify(const StorageID & parent_id, std::chrono::system_clock::time_point scheduled_time_without_spread, const RefreshTimer & parent_timer) { std::lock_guard guard(mutex); - if (!combiner.arriveParent(parent_id)) + if (!set_entry) + return; // we've shut down + + /// In the general case, it's not clear what the meaning of dependencies should be. + /// E.g. what behavior would the user want/expect in the following cases?: + /// * REFRESH EVERY 3 HOUR depends on REFRESH EVERY 2 HOUR + /// * REFRESH AFTER 3 HOUR depends on REFRESH AFTER 2 HOUR + /// * REFRESH AFTER 3 HOUR depends on REFRESH EVERY 1 DAY + /// I don't know. + /// + /// Cases that are important to support well include: + /// (1) REFRESH EVERY 1 DAY depends on REFRESH EVERY 1 DAY + /// Here the second refresh should start only after the first refresh completed *for the same day*. + /// Yesterday's refresh of the dependency shouldn't trigger today's refresh of the dependent, + /// even if it completed today. + /// (2) REFRESH EVERY 1 DAY OFFSET 2 HOUR depends on REFRESH EVERY 1 DAY OFFSET 1 HOUR + /// (3) REFRESH EVERY 1 DAY OFFSET 1 HOUR depends on REFRESH EVERY 1 DAY OFFSET 23 HOUR + /// Here the dependency's refresh on day X should trigger dependent's refresh on day X+1. + /// (4) REFRESH EVERY 2 HOUR depends on REFRESH EVERY 1 HOUR + /// The 2 HOUR refresh should happen after the 1 HOUR refresh for every other hour, e.g. + /// after the 2pm refresh, then after the 4pm refresh, etc. + /// (5) REFRESH AFTER 1 HOUR depends on REFRESH AFTER 1 HOUR + /// Here the two views should try to synchronize their schedules instead of arbitrarily drifting + /// apart. In particular, consider the case where the dependency refreshes slightly faster than + /// the dependent. If we don't do anything special, the DEPENDS ON will have pretty much no effect. + /// To apply some synchronization pressure, we reduce the dependent's delay by some percentage + /// after the dependent completed. + /// (6) REFRESH AFTER 1 HOUR depends on REFRESH AFTER 2 HOUR + /// REFRESH EVERY 1 HOUR depends on REFRESH EVERY 2 HOUR + /// Not sure about these. Currently we just make the dependent refresh at the same rate as + /// the dependency, i.e. the 1 HOUR table will actually be refreshed every 2 hours. + + /// Only accept the dependency's refresh if its next refresh time is after ours. + /// This takes care of cases (1)-(4), and seems harmless in all other cases. + /// Might be mildly helpful in weird cases like REFRESH AFTER 3 HOUR depends on REFRESH AFTER 2 HOUR. + if (parent_timer.next(scheduled_time_without_spread) <= next_refresh_without_spread) return; - if (std::exchange(refresh_immediately, true)) - return; - refresh_task->schedule(); + + if (arriveDependency(parent_id) && !std::exchange(refresh_immediately, true)) + refresh_task->schedule(); + + /// Decrease delay in case (5). + /// Maybe we should do it for all AFTER-AFTER dependencies, even if periods are different. + if (refresh_timer == parent_timer && refresh_timer.tryGetAfter()) + { + /// TODO: Implement this: + /// * Add setting max_after_delay_adjustment_pct + /// * Decrease both next_refresh_without_spread and next_refresh_with_spread, + /// but only if they haven't already been decreased this way during current period + /// * refresh_task->schedule() + } } void RefreshTask::refreshTask() @@ -205,7 +242,7 @@ void RefreshTask::refreshTask() auto now = std::chrono::system_clock::now(); if (now >= next_refresh_with_spread) { - if (combiner.arriveTime()) + if (arriveTime()) refresh_immediately = true; else { @@ -239,6 +276,9 @@ void RefreshTask::refreshTask() reportState(RefreshState::Running); + CurrentMetrics::Increment metric_inc(CurrentMetrics::RefreshingViews); + auto scheduled_time_without_spread = next_refresh_without_spread; + lock.unlock(); bool finished = false; @@ -251,7 +291,7 @@ void RefreshTask::refreshTask() finished = executeRefresh(); if (finished) - completeRefresh(view, LastTaskResult::Finished); + completeRefresh(view, LastTaskResult::Finished, scheduled_time_without_spread); } catch (...) { @@ -311,12 +351,17 @@ bool RefreshTask::executeRefresh() return !not_finished; } -void RefreshTask::completeRefresh(std::shared_ptr view, LastTaskResult result) +void RefreshTask::completeRefresh(std::shared_ptr view, LastTaskResult result, std::chrono::system_clock::time_point scheduled_time_without_spread) { auto stale_table = view->exchangeTargetTable(refresh_query->table_id); - dependencies.notifyAll(view->getStorageID()); - auto drop_context = Context::createCopy(view->getContext()); + auto context = view->getContext(); + StorageID my_id = set_entry->getID(); + auto dependents = context->getRefreshSet().getDependents(my_id); + for (const RefreshTaskHolder & dep_task : dependents) + dep_task->notify(my_id, scheduled_time_without_spread, refresh_timer); + + auto drop_context = Context::createCopy(context); InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, stale_table, /*sync=*/true); cleanState(); @@ -351,16 +396,6 @@ void RefreshTask::cleanState() refresh_query.reset(); } -namespace -{ - -template -struct CombinedVisitor : Ts... { using Ts::operator()...; }; -template -CombinedVisitor(Ts...) -> CombinedVisitor; - -} - void RefreshTask::calculateNextRefreshTime(std::chrono::system_clock::time_point now) { /// TODO: Add a setting to randomize initial delay in case of AFTER, for the case when the server @@ -368,21 +403,11 @@ void RefreshTask::calculateNextRefreshTime(std::chrono::system_clock::time_point /// TODO: Maybe do something like skip_update_after_seconds and skip_update_after_ratio. /// Unclear if that's useful at all if the last refresh timestamp is not remembered across restarts. - auto advance = [&](std::chrono::system_clock::time_point t) - { - CombinedVisitor refresh_time_visitor{ - [t](const RefreshAfterTimer & timer) { return timer.after(t); }, - [t](const RefreshEveryTimer & timer) { return timer.next(t); }}; - auto r = std::visit(std::move(refresh_time_visitor), refresh_timer); - chassert(r > t); - return r; - }; - /// It's important to use time without spread here, otherwise we would do multiple refreshes instead /// of one, if the generated spread is negative and the first refresh completes faster than the spread. - std::chrono::sys_seconds next = advance(next_refresh_without_spread); + std::chrono::sys_seconds next = refresh_timer.next(next_refresh_without_spread); if (next < now) - next = advance(now); // fell behind, skip to current time + next = refresh_timer.next(now); // fell behind, skip to current time next_refresh_without_spread = next; next_refresh_with_spread = next + std::chrono::seconds{refresh_spread(thread_local_rng)}; @@ -390,6 +415,32 @@ void RefreshTask::calculateNextRefreshTime(std::chrono::system_clock::time_point reportNextRefreshTime(next_refresh_with_spread); } +bool RefreshTask::arriveDependency(const StorageID & parent_table_or_timer) +{ + remaining_dependencies.erase(parent_table_or_timer); + if (!remaining_dependencies.empty() || !time_arrived) + return false; + populateDependencies(); + return true; +} + +bool RefreshTask::arriveTime() +{ + time_arrived = true; + if (!remaining_dependencies.empty() || !time_arrived) + return false; + populateDependencies(); + return true; +} + +void RefreshTask::populateDependencies() +{ + chassert(remaining_dependencies.empty()); + auto deps = set_entry->getDependencies(); + remaining_dependencies.insert(deps.begin(), deps.end()); + time_arrived = false; +} + std::shared_ptr RefreshTask::lockView() { return std::static_pointer_cast(view_to_refresh.lock()); diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index 192a4776be0..cdb0d22342e 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -1,7 +1,5 @@ #pragma once -#include -#include #include #include #include @@ -70,8 +68,11 @@ public: /// Resume task execution void resume(); + /// Permanently disable task scheduling and remove this table from RefreshSet. + void shutdown(); + /// Notify dependent task - void notify(const StorageID & parent_id); + void notify(const StorageID & parent_id, std::chrono::system_clock::time_point scheduled_time_without_spread, const RefreshTimer & parent_timer); private: Poco::Logger * log = nullptr; @@ -79,7 +80,7 @@ private: RefreshSet::Entry set_entry; /// Refresh schedule - std::variant refresh_timer; + RefreshTimer refresh_timer; std::uniform_int_distribution refresh_spread; /// Task execution. Non-empty iff a refresh is in progress (possibly paused). @@ -88,10 +89,9 @@ private: std::optional refresh_block; std::shared_ptr refresh_query; - /// Concurrent dependency management - RefreshAllCombiner combiner; - RefreshDependencies dependencies; - std::vector deps_entries; + /// StorageIDs of our dependencies that we're waiting for. + DatabaseAndTableNameSet remaining_dependencies; + bool time_arrived = false; /// Protects all fields below (they're accessed by both refreshTask() and public methods). /// Never locked for blocking operations (e.g. creating or dropping the internal table). @@ -132,13 +132,18 @@ private: /// Methods that do the actual work: creating/dropping internal table, executing the query. void initializeRefresh(std::shared_ptr view); bool executeRefresh(); - void completeRefresh(std::shared_ptr view, LastTaskResult result); + void completeRefresh(std::shared_ptr view, LastTaskResult result, std::chrono::system_clock::time_point scheduled_time_without_spread); void cancelRefresh(LastTaskResult result); void cleanState(); /// Assigns next_refresh_* void calculateNextRefreshTime(std::chrono::system_clock::time_point now); + /// Returns true if all dependencies are fulfilled now. Refills remaining_dependencies in this case. + bool arriveDependency(const StorageID & parent_table_or_timer); + bool arriveTime(); + void populateDependencies(); + std::shared_ptr lockView(); /// Methods that push information to RefreshSet, for observability. diff --git a/src/Storages/MaterializedView/RefreshTimers.cpp b/src/Storages/MaterializedView/RefreshTimers.cpp index ebef561fc29..0331bad82c3 100644 --- a/src/Storages/MaterializedView/RefreshTimers.cpp +++ b/src/Storages/MaterializedView/RefreshTimers.cpp @@ -1,10 +1,15 @@ #include -#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + namespace { constexpr std::chrono::days ZERO_DAYS{0}; @@ -68,6 +73,14 @@ void RefreshAfterTimer::setWithKind(IntervalKind kind, UInt64 val) } } +bool RefreshAfterTimer::operator==(const RefreshAfterTimer & rhs) const +{ + /// (Or maybe different implementations of standard library have different sizes of chrono types. + /// If so, feel free to just remove this assert.) + static_assert(sizeof(*this) == 40, "RefreshAfterTimer fields appear to have changed. Please update this operator==() here."); + return std::tie(seconds, minutes, hours, days, weeks, months, years) == std::tie(rhs.seconds, rhs.minutes, rhs.hours, rhs.days, rhs.weeks, rhs.months, rhs.years); +} + RefreshEveryTimer::RefreshEveryTimer(const ASTTimePeriod & time_period, const ASTTimeInterval * time_offset) : offset(time_offset) , value{static_cast(time_period.value)} @@ -240,4 +253,50 @@ std::chrono::sys_seconds RefreshEveryTimer::alignedToSeconds(std::chrono::system return tp_minutes + next_seconds; } +bool RefreshEveryTimer::operator==(const RefreshEveryTimer & rhs) const +{ + static_assert(sizeof(*this) == sizeof(offset) + 8, "RefreshEveryTimer fields appear to have changed. Please update this operator==() here."); + return std::tie(offset, value, kind) == std::tie(rhs.offset, rhs.value, rhs.kind); +} + +std::variant makeTimer(const ASTRefreshStrategy & strategy) +{ + using enum ASTRefreshStrategy::ScheduleKind; + switch (strategy.schedule_kind) + { + case EVERY: + return RefreshEveryTimer{*strategy.period, strategy.interval}; + case AFTER: + return RefreshAfterTimer{strategy.interval}; + default: + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown refresh strategy kind"); + } +} + +RefreshTimer::RefreshTimer(const ASTRefreshStrategy & strategy) : timer(makeTimer(strategy)) {} + +namespace +{ + +template +struct CombinedVisitor : Ts... { using Ts::operator()...; }; +template +CombinedVisitor(Ts...) -> CombinedVisitor; + +} + +std::chrono::sys_seconds RefreshTimer::next(std::chrono::system_clock::time_point tp) const +{ + CombinedVisitor visitor{ + [tp](const RefreshAfterTimer & timer_) { return timer_.after(tp); }, + [tp](const RefreshEveryTimer & timer_) { return timer_.next(tp); }}; + auto r = std::visit(std::move(visitor), timer); + chassert(r > tp); + return r; +} + +bool RefreshTimer::operator==(const RefreshTimer & rhs) const { return timer == rhs.timer; } +const RefreshAfterTimer * RefreshTimer::tryGetAfter() const { return std::get_if(&timer); } +const RefreshEveryTimer * RefreshTimer::tryGetEvery() const { return std::get_if(&timer); } + } diff --git a/src/Storages/MaterializedView/RefreshTimers.h b/src/Storages/MaterializedView/RefreshTimers.h index 0672782a3f9..4625e8cd344 100644 --- a/src/Storages/MaterializedView/RefreshTimers.h +++ b/src/Storages/MaterializedView/RefreshTimers.h @@ -9,6 +9,7 @@ namespace DB class ASTTimeInterval; class ASTTimePeriod; +class ASTRefreshStrategy; /// Schedule timer for MATERIALIZED VIEW ... REFRESH AFTER ... queries class RefreshAfterTimer @@ -26,6 +27,8 @@ public: std::chrono::months getMonths() const { return months; } std::chrono::years getYears() const { return years; } + bool operator==(const RefreshAfterTimer & rhs) const; + private: void setWithKind(IntervalKind kind, UInt64 val); @@ -46,6 +49,8 @@ public: std::chrono::sys_seconds next(std::chrono::system_clock::time_point tp) const; + bool operator==(const RefreshEveryTimer & rhs) const; + private: std::chrono::sys_seconds alignedToYears(std::chrono::system_clock::time_point tp) const; @@ -66,4 +71,18 @@ private: IntervalKind kind{IntervalKind::Second}; }; +struct RefreshTimer +{ + std::variant timer; + + explicit RefreshTimer(const ASTRefreshStrategy & strategy); + + std::chrono::sys_seconds next(std::chrono::system_clock::time_point tp) const; + + bool operator==(const RefreshTimer & rhs) const; + + const RefreshAfterTimer * tryGetAfter() const; + const RefreshEveryTimer * tryGetEvery() const; +}; + } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 4f2ffb38017..6504bfa313b 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -451,7 +451,7 @@ void StorageMaterializedView::startup() void StorageMaterializedView::shutdown(bool) { if (refresher) - refresher->stop(); + refresher->shutdown(); auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & select_query = metadata_snapshot->getSelectQuery(); From a7c369e14f61566b2919f04e09ebe2f8cbe4954f Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Sat, 25 Nov 2023 03:00:32 +0000 Subject: [PATCH 65/88] Overhaul timestamp arithmetic --- src/Common/CalendarTimeInterval.cpp | 139 ++++++++ src/Common/CalendarTimeInterval.h | 64 ++++ src/Parsers/ASTRefreshStrategy.cpp | 14 +- src/Parsers/ASTRefreshStrategy.h | 23 +- src/Parsers/ASTTimeInterval.cpp | 14 +- src/Parsers/ASTTimeInterval.h | 17 +- src/Parsers/ParserRefreshStrategy.cpp | 31 +- src/Parsers/ParserTimeInterval.cpp | 76 ++--- src/Parsers/ParserTimeInterval.h | 20 +- .../MaterializedView/RefreshSchedule.cpp | 57 ++++ .../MaterializedView/RefreshSchedule.h | 28 ++ src/Storages/MaterializedView/RefreshTask.cpp | 64 ++-- src/Storages/MaterializedView/RefreshTask.h | 29 +- .../MaterializedView/RefreshTimers.cpp | 302 ------------------ src/Storages/MaterializedView/RefreshTimers.h | 88 ----- 15 files changed, 411 insertions(+), 555 deletions(-) create mode 100644 src/Common/CalendarTimeInterval.cpp create mode 100644 src/Common/CalendarTimeInterval.h create mode 100644 src/Storages/MaterializedView/RefreshSchedule.cpp create mode 100644 src/Storages/MaterializedView/RefreshSchedule.h delete mode 100644 src/Storages/MaterializedView/RefreshTimers.cpp delete mode 100644 src/Storages/MaterializedView/RefreshTimers.h diff --git a/src/Common/CalendarTimeInterval.cpp b/src/Common/CalendarTimeInterval.cpp new file mode 100644 index 00000000000..bcedf63b3ff --- /dev/null +++ b/src/Common/CalendarTimeInterval.cpp @@ -0,0 +1,139 @@ +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +CalendarTimeInterval::CalendarTimeInterval(const CalendarTimeInterval::Intervals & intervals) +{ + for (auto [kind, val] : intervals) + { + switch (kind.kind) + { + case IntervalKind::Nanosecond: + case IntervalKind::Microsecond: + case IntervalKind::Millisecond: + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sub-second intervals are not supported here"); + + case IntervalKind::Second: + case IntervalKind::Minute: + case IntervalKind::Hour: + case IntervalKind::Day: + case IntervalKind::Week: + seconds += val * kind.toAvgSeconds(); + break; + + case IntervalKind::Month: + months += val; + break; + case IntervalKind::Quarter: + months += val * 3; + break; + case IntervalKind::Year: + months += val * 12; + break; + } + } +} + +CalendarTimeInterval::Intervals CalendarTimeInterval::toIntervals() const +{ + Intervals res; + auto greedy = [&](UInt64 x, std::initializer_list> kinds) + { + for (auto [kind, count] : kinds) + { + UInt64 k = x / count; + if (k == 0) + continue; + x -= k * count; + res.emplace_back(kind, k); + } + chassert(x == 0); + }; + greedy(months, {{IntervalKind::Year, 12}, {IntervalKind::Month, 1}}); + greedy(seconds, {{IntervalKind::Week, 3600*24*7}, {IntervalKind::Day, 3600*24}, {IntervalKind::Hour, 3600}, {IntervalKind::Minute, 60}, {IntervalKind::Second, 1}}); + return res; +} + +UInt64 CalendarTimeInterval::minSeconds() const +{ + return 3600*24 * (365 * months/12 + 28 * months%12) + seconds; +} + +UInt64 CalendarTimeInterval::maxSeconds() const +{ + return 3600*24 * (366 * months/12 + 31 * months%12) + seconds; +} + +void CalendarTimeInterval::assertSingleUnit() const +{ + if (seconds && months) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval shouldn't contain both calendar units and clock units (e.g. months and days)"); +} + +void CalendarTimeInterval::assertPositive() const +{ + if (!seconds && !months) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval must be positive"); +} + +/// Number of whole months between 1970-01-01 and `t`. +static Int64 toAbsoluteMonth(std::chrono::system_clock::time_point t) +{ + std::chrono::year_month_day ymd(std::chrono::floor(t)); + return (Int64(int(ymd.year())) - 1970) * 12 + Int64(unsigned(ymd.month()) - 1); +} + +static std::chrono::sys_seconds startOfAbsoluteMonth(Int64 absolute_month) +{ + Int64 year = absolute_month >= 0 ? absolute_month/12 : -((-absolute_month+11)/12); + Int64 month = absolute_month - year*12; + chassert(month >= 0 && month < 12); + std::chrono::year_month_day ymd( + std::chrono::year(int(year + 1970)), + std::chrono::month(unsigned(month + 1)), + std::chrono::day(1)); + return std::chrono::sys_days(ymd); +} + +std::chrono::sys_seconds CalendarTimeInterval::advance(std::chrono::system_clock::time_point tp) const +{ + auto t = std::chrono::sys_seconds(std::chrono::floor(tp)); + if (months) + { + auto m = toAbsoluteMonth(t); + auto s = t - startOfAbsoluteMonth(m); + t = startOfAbsoluteMonth(m + Int64(months)) + s; + } + return t + std::chrono::seconds(Int64(seconds)); +} + +std::chrono::sys_seconds CalendarTimeInterval::floor(std::chrono::system_clock::time_point tp) const +{ + assertSingleUnit(); + assertPositive(); + + if (months) + return startOfAbsoluteMonth(toAbsoluteMonth(tp) / months * months); + else + { + constexpr std::chrono::seconds epoch(-3600*24*3); + auto t = std::chrono::sys_seconds(std::chrono::floor(tp)); + /// We want to align with weeks, but 1970-01-01 is a Thursday, so align with 1969-12-29 instead. + return std::chrono::sys_seconds((t.time_since_epoch() - epoch) / seconds * seconds + epoch); + } +} + +bool CalendarTimeInterval::operator==(const CalendarTimeInterval & rhs) const +{ + return std::tie(months, seconds) == std::tie(rhs.months, rhs.seconds); +} + +} diff --git a/src/Common/CalendarTimeInterval.h b/src/Common/CalendarTimeInterval.h new file mode 100644 index 00000000000..40a390736bc --- /dev/null +++ b/src/Common/CalendarTimeInterval.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Represents a duration of calendar time, e.g.: +/// * 2 weeks + 5 minutes + and 21 seconds (aka 605121 seconds), +/// * 1 (calendar) month - not equivalent to any number of seconds! +/// * 3 years + 2 weeks (aka 36 months + 604800 seconds). +/// +/// Be careful with calendar arithmetic: it's missing many familiar properties of numbers. +/// E.g. x + y - y is not always equal to x (October 31 + 1 month - 1 month = November 1). +struct CalendarTimeInterval +{ + UInt64 seconds = 0; + UInt64 months = 0; + + using Intervals = std::vector>; + + CalendarTimeInterval() = default; + + /// Year, Quarter, Month are converted to months. + /// Week, Day, Hour, Minute, Second are converted to seconds. + /// Millisecond, Microsecond, Nanosecond throw exception. + explicit CalendarTimeInterval(const Intervals & intervals); + + /// E.g. for {36 months, 604801 seconds} returns {3 years, 2 weeks, 1 second}. + Intervals toIntervals() const; + + /// Approximate shortest and longest duration in seconds. E.g. a month is [28, 31] days. + UInt64 minSeconds() const; + UInt64 maxSeconds() const; + + /// Checks that the interval has only months or only seconds, throws otherwise. + void assertSingleUnit() const; + void assertPositive() const; + + /// Add this interval to the timestamp. First months, then seconds. + /// Gets weird near month boundaries: October 31 + 1 month = December 1. + /// Gets weird with leap years: 2004-03-15 + 1 year = 2005-03-16, + /// 2004-12-31 + 1 year = 2006-01-01, + std::chrono::sys_seconds advance(std::chrono::system_clock::time_point t) const; + + /// Rounds the timestamp down to the nearest timestamp "aligned" with this interval. + /// The interval must satisfy assertSingleUnit() and assertPositive(). + /// * For months, rounds to the start of a month whose abosolute index is divisible by `months`. + /// The month index is 0-based starting from January 1970. + /// E.g. if the interval is 1 month, rounds down to the start of the month. + /// * For seconds, rounds to a timestamp x such that (x - December 29 1969 (Monday)) is divisible + /// by this interval. + /// E.g. if the interval is 1 week, rounds down to the start of the week. + /// + /// Guarantees: + /// * advance(floor(x)) > x + /// * floor(advance(floor(x))) = advance(floor(x)) + std::chrono::sys_seconds floor(std::chrono::system_clock::time_point t) const; + + bool operator==(const CalendarTimeInterval & rhs) const; +}; + +} diff --git a/src/Parsers/ASTRefreshStrategy.cpp b/src/Parsers/ASTRefreshStrategy.cpp index ed8216cf4aa..2e0c6ee4638 100644 --- a/src/Parsers/ASTRefreshStrategy.cpp +++ b/src/Parsers/ASTRefreshStrategy.cpp @@ -10,12 +10,10 @@ ASTPtr ASTRefreshStrategy::clone() const auto res = std::make_shared(*this); res->children.clear(); - if (interval) - res->set(res->interval, interval->clone()); if (period) res->set(res->period, period->clone()); - if (periodic_offset) - res->set(res->periodic_offset, periodic_offset->clone()); + if (offset) + res->set(res->offset, offset->clone()); if (spread) res->set(res->spread, spread->clone()); if (settings) @@ -32,20 +30,20 @@ void ASTRefreshStrategy::formatImpl( frame.need_parens = false; f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << "REFRESH " << (f_settings.hilite ? hilite_none : ""); - using enum ScheduleKind; + using enum RefreshScheduleKind; switch (schedule_kind) { case AFTER: f_settings.ostr << "AFTER " << (f_settings.hilite ? hilite_none : ""); - interval->formatImpl(f_settings, state, frame); + period->formatImpl(f_settings, state, frame); break; case EVERY: f_settings.ostr << "EVERY " << (f_settings.hilite ? hilite_none : ""); period->formatImpl(f_settings, state, frame); - if (periodic_offset) + if (offset) { f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << " OFFSET " << (f_settings.hilite ? hilite_none : ""); - periodic_offset->formatImpl(f_settings, state, frame); + offset->formatImpl(f_settings, state, frame); } break; default: diff --git a/src/Parsers/ASTRefreshStrategy.h b/src/Parsers/ASTRefreshStrategy.h index 0df6a6e0e10..ca248b76b40 100644 --- a/src/Parsers/ASTRefreshStrategy.h +++ b/src/Parsers/ASTRefreshStrategy.h @@ -7,24 +7,23 @@ namespace DB { +enum class RefreshScheduleKind : UInt8 +{ + UNKNOWN = 0, + AFTER, + EVERY +}; + /// Strategy for MATERIALIZED VIEW ... REFRESH .. class ASTRefreshStrategy : public IAST { public: - enum class ScheduleKind : UInt8 - { - UNKNOWN = 0, - AFTER, - EVERY - }; - ASTSetQuery * settings = nullptr; ASTExpressionList * dependencies = nullptr; - ASTTimeInterval * interval = nullptr; - ASTTimePeriod * period = nullptr; - ASTTimeInterval * periodic_offset = nullptr; - ASTTimePeriod * spread = nullptr; - ScheduleKind schedule_kind{ScheduleKind::UNKNOWN}; + ASTTimeInterval * period = nullptr; + ASTTimeInterval * offset = nullptr; + ASTTimeInterval * spread = nullptr; + RefreshScheduleKind schedule_kind{RefreshScheduleKind::UNKNOWN}; String getID(char) const override { return "Refresh strategy definition"; } diff --git a/src/Parsers/ASTTimeInterval.cpp b/src/Parsers/ASTTimeInterval.cpp index 13e07f4ed65..44f6e577e94 100644 --- a/src/Parsers/ASTTimeInterval.cpp +++ b/src/Parsers/ASTTimeInterval.cpp @@ -7,18 +7,6 @@ namespace DB { -ASTPtr ASTTimePeriod::clone() const -{ - return std::make_shared(*this); -} - -void ASTTimePeriod::formatImpl(const FormatSettings & f_settings, FormatState &, FormatStateStacked frame) const -{ - frame.need_parens = false; - f_settings.ostr << value << ' '; - f_settings.ostr << (f_settings.hilite ? hilite_keyword : "") << kind.toKeyword() << (f_settings.hilite ? hilite_none : ""); -} - ASTPtr ASTTimeInterval::clone() const { return std::make_shared(*this); @@ -28,7 +16,7 @@ void ASTTimeInterval::formatImpl(const FormatSettings & f_settings, FormatState { frame.need_parens = false; - for (bool is_first = true; auto [kind, value] : kinds | std::views::reverse) + for (bool is_first = true; auto [kind, value] : interval.toIntervals()) { if (!std::exchange(is_first, false)) f_settings.ostr << ' '; diff --git a/src/Parsers/ASTTimeInterval.h b/src/Parsers/ASTTimeInterval.h index a8f2518d180..a68acd0f8ea 100644 --- a/src/Parsers/ASTTimeInterval.h +++ b/src/Parsers/ASTTimeInterval.h @@ -2,31 +2,18 @@ #include -#include +#include #include namespace DB { -/// Simple periodic time interval like 10 SECOND -class ASTTimePeriod : public IAST -{ -public: - UInt64 value{0}; - IntervalKind kind{IntervalKind::Second}; - - String getID(char) const override { return "TimePeriod"; } - - ASTPtr clone() const override; - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; -}; - /// Compound time interval like 1 YEAR 3 DAY 15 MINUTE class ASTTimeInterval : public IAST { public: - std::map kinds; + CalendarTimeInterval interval; String getID(char) const override { return "TimeInterval"; } diff --git a/src/Parsers/ParserRefreshStrategy.cpp b/src/Parsers/ParserRefreshStrategy.cpp index 05dd081e61d..af518ee203e 100644 --- a/src/Parsers/ParserRefreshStrategy.cpp +++ b/src/Parsers/ParserRefreshStrategy.cpp @@ -11,41 +11,52 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + bool ParserRefreshStrategy::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto refresh = std::make_shared(); if (ParserKeyword{"AFTER"}.ignore(pos, expected)) { - refresh->schedule_kind = ASTRefreshStrategy::ScheduleKind::AFTER; - ASTPtr interval; - if (!ParserTimeInterval{}.parse(pos, interval, expected)) + refresh->schedule_kind = RefreshScheduleKind::AFTER; + ASTPtr period; + if (!ParserTimeInterval{}.parse(pos, period, expected)) return false; - refresh->set(refresh->interval, interval); + refresh->set(refresh->period, period); } else if (ParserKeyword{"EVERY"}.ignore(pos, expected)) { - refresh->schedule_kind = ASTRefreshStrategy::ScheduleKind::EVERY; + refresh->schedule_kind = RefreshScheduleKind::EVERY; ASTPtr period; - if (!ParserTimePeriod{}.parse(pos, period, expected)) + if (!ParserTimeInterval{{.allow_mixing_calendar_and_clock_units = false}}.parse(pos, period, expected)) return false; refresh->set(refresh->period, period); if (ParserKeyword{"OFFSET"}.ignore(pos, expected)) { ASTPtr periodic_offset; - if (!ParserTimeInterval{}.parse(pos, periodic_offset, expected)) + if (!ParserTimeInterval{{.allow_zero = true}}.parse(pos, periodic_offset, expected)) return false; - refresh->set(refresh->periodic_offset, periodic_offset); + + if (periodic_offset->as()->interval.maxSeconds() + >= period->as()->interval.minSeconds()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "OFFSET must be less than the period"); + + refresh->set(refresh->offset, periodic_offset); } } - if (refresh->schedule_kind == ASTRefreshStrategy::ScheduleKind::UNKNOWN) + if (refresh->schedule_kind == RefreshScheduleKind::UNKNOWN) return false; if (ParserKeyword{"RANDOMIZE FOR"}.ignore(pos, expected)) { ASTPtr spread; - if (!ParserTimePeriod{}.parse(pos, spread, expected)) + if (!ParserTimeInterval{{.allow_zero = true}}.parse(pos, spread, expected)) return false; refresh->set(refresh->spread, spread); diff --git a/src/Parsers/ParserTimeInterval.cpp b/src/Parsers/ParserTimeInterval.cpp index 8168ae61a33..8454eb27e1d 100644 --- a/src/Parsers/ParserTimeInterval.cpp +++ b/src/Parsers/ParserTimeInterval.cpp @@ -14,64 +14,40 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -namespace -{ - -struct ValKind -{ - UInt64 val; - IntervalKind kind; - bool empty; -}; - -std::optional parseValKind(IParser::Pos & pos, Expected & expected) -{ - ASTPtr value; - IntervalKind kind; - if (!ParserNumber{}.parse(pos, value, expected)) - return ValKind{ .empty = true }; - if (!parseIntervalKind(pos, expected, kind)) - return {}; - UInt64 val; - if (!value->as().value.tryGet(val)) - throw Exception(ErrorCodes::SYNTAX_ERROR, "Time interval must be an integer"); - return ValKind{ val, kind, false }; -} - -} - -bool ParserTimePeriod::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto parsed = parseValKind(pos, expected); - - if (!parsed || parsed->empty || parsed->val == 0) - return false; - - auto time_period = std::make_shared(); - time_period->value = parsed->val; - time_period->kind = parsed->kind; - - node = time_period; - return true; -} +ParserTimeInterval::ParserTimeInterval(Options opt) : options(opt) {} +ParserTimeInterval::ParserTimeInterval() = default; bool ParserTimeInterval::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - auto time_interval = std::make_shared(); - - auto parsed = parseValKind(pos, expected); - while (parsed && !parsed->empty) + CalendarTimeInterval::Intervals intervals; + while (true) { - if (parsed->val == 0) + ASTPtr value; + IntervalKind kind; + if (!ParserNumber{}.parse(pos, value, expected)) + break; + if (!parseIntervalKind(pos, expected, kind)) return false; - auto [it, inserted] = time_interval->kinds.emplace(parsed->kind, parsed->val); - if (!inserted) - return false; - parsed = parseValKind(pos, expected); + + UInt64 val; + if (!value->as().value.tryGet(val)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Time interval must be an integer"); + intervals.emplace_back(kind, val); } - if (!parsed || time_interval->kinds.empty()) + if (intervals.empty()) return false; + + CalendarTimeInterval interval(intervals); + + if (!options.allow_zero) + interval.assertPositive(); + if (!options.allow_mixing_calendar_and_clock_units) + interval.assertSingleUnit(); + + auto time_interval = std::make_shared(); + time_interval->interval = interval; + node = time_interval; return true; } diff --git a/src/Parsers/ParserTimeInterval.h b/src/Parsers/ParserTimeInterval.h index 6eae1fa4133..2a6d7fd2534 100644 --- a/src/Parsers/ParserTimeInterval.h +++ b/src/Parsers/ParserTimeInterval.h @@ -5,18 +5,22 @@ namespace DB { -/// Parser for ASTTimePeriod -class ParserTimePeriod : public IParserBase -{ -protected: - const char * getName() const override { return "time period"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - /// Parser for ASTTimeInterval class ParserTimeInterval : public IParserBase { +public: + struct Options + { + bool allow_mixing_calendar_and_clock_units = true; + bool allow_zero = false; + }; + + ParserTimeInterval(); + explicit ParserTimeInterval(Options opt); + protected: + Options options; + const char * getName() const override { return "time interval"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Storages/MaterializedView/RefreshSchedule.cpp b/src/Storages/MaterializedView/RefreshSchedule.cpp new file mode 100644 index 00000000000..fab6877848c --- /dev/null +++ b/src/Storages/MaterializedView/RefreshSchedule.cpp @@ -0,0 +1,57 @@ +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +RefreshSchedule::RefreshSchedule(const ASTRefreshStrategy & strategy) +{ + kind = strategy.schedule_kind; + period = strategy.period->interval; + if (strategy.offset) + offset = strategy.offset->interval; + if (strategy.spread) + spread = strategy.spread->interval; +} + +static std::chrono::sys_seconds advanceEvery(std::chrono::system_clock::time_point prev, CalendarTimeInterval period, CalendarTimeInterval offset) +{ + auto period_start = period.floor(prev); + auto t = offset.advance(period_start); + if (t > prev) + return t; + t = offset.advance(period.advance(period_start)); + chassert(t > prev); + return t; +} + +std::chrono::sys_seconds RefreshSchedule::prescribeNext( + std::chrono::system_clock::time_point last_prescribed, std::chrono::system_clock::time_point now) const +{ + if (kind == RefreshScheduleKind::AFTER) + return period.advance(now); + + /// It's important to use prescribed instead of actual time here, otherwise we would do multiple + /// refreshes instead of one if the generated spread is negative and the the refresh completes + /// faster than the spread. + auto res = advanceEvery(last_prescribed, period, offset); + if (res < now) + res = advanceEvery(now, period, offset); // fell behind by a whole period, skip to current time + + return res; +} + +std::chrono::system_clock::time_point RefreshSchedule::addRandomSpread(std::chrono::sys_seconds prescribed_time) const +{ + Int64 ms = Int64(spread.minSeconds() * 1000 / 2); + auto add = std::uniform_int_distribution(-ms, ms)(thread_local_rng); + return prescribed_time + std::chrono::milliseconds(add); +} + +} diff --git a/src/Storages/MaterializedView/RefreshSchedule.h b/src/Storages/MaterializedView/RefreshSchedule.h new file mode 100644 index 00000000000..2a6a5afc3ad --- /dev/null +++ b/src/Storages/MaterializedView/RefreshSchedule.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class ASTRefreshStrategy; + +struct RefreshSchedule +{ + RefreshScheduleKind kind; + CalendarTimeInterval period; + CalendarTimeInterval offset; + CalendarTimeInterval spread; + + explicit RefreshSchedule(const ASTRefreshStrategy & strategy); + + /// Tells when to do the next refresh (without random spread). + std::chrono::sys_seconds prescribeNext( + std::chrono::system_clock::time_point last_prescribed, std::chrono::system_clock::time_point now) const; + + std::chrono::system_clock::time_point addRandomSpread(std::chrono::sys_seconds prescribed_time) const; +}; + +} diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index 000ee7aa1bd..710d48efdb8 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -17,24 +17,10 @@ namespace CurrentMetrics namespace DB { -namespace -{ - -std::uniform_int_distribution makeSpreadDistribution(const ASTTimePeriod * spread) -{ - if (!spread) - return std::uniform_int_distribution(0, 0); - Int64 limit = spread->kind.toAvgSeconds() * spread->value / 2; - return std::uniform_int_distribution(-limit, limit); -} - -} - RefreshTask::RefreshTask( const ASTRefreshStrategy & strategy) : log(&Poco::Logger::get("RefreshTask")) - , refresh_timer(strategy) - , refresh_spread{makeSpreadDistribution(strategy.spread)} + , refresh_schedule(strategy) {} RefreshTaskHolder RefreshTask::create( @@ -66,7 +52,7 @@ void RefreshTask::initializeAndStart(std::shared_ptr vi view_to_refresh = view; /// TODO: Add a setting to stop views on startup, set `stop_requested = true` in that case. populateDependencies(); - calculateNextRefreshTime(std::chrono::system_clock::now()); + advanceNextRefreshTime(std::chrono::system_clock::now()); refresh_task->schedule(); } @@ -139,7 +125,7 @@ void RefreshTask::shutdown() set_entry.reset(); } -void RefreshTask::notify(const StorageID & parent_id, std::chrono::system_clock::time_point scheduled_time_without_spread, const RefreshTimer & parent_timer) +void RefreshTask::notify(const StorageID & parent_id, std::chrono::sys_seconds prescribed_time, const RefreshSchedule & parent_schedule) { std::lock_guard guard(mutex); if (!set_entry) @@ -177,7 +163,7 @@ void RefreshTask::notify(const StorageID & parent_id, std::chrono::system_clock: /// Only accept the dependency's refresh if its next refresh time is after ours. /// This takes care of cases (1)-(4), and seems harmless in all other cases. /// Might be mildly helpful in weird cases like REFRESH AFTER 3 HOUR depends on REFRESH AFTER 2 HOUR. - if (parent_timer.next(scheduled_time_without_spread) <= next_refresh_without_spread) + if (parent_schedule.prescribeNext(prescribed_time, std::chrono::system_clock::now()) <= next_refresh_prescribed) return; if (arriveDependency(parent_id) && !std::exchange(refresh_immediately, true)) @@ -185,11 +171,13 @@ void RefreshTask::notify(const StorageID & parent_id, std::chrono::system_clock: /// Decrease delay in case (5). /// Maybe we should do it for all AFTER-AFTER dependencies, even if periods are different. - if (refresh_timer == parent_timer && refresh_timer.tryGetAfter()) + if (refresh_schedule.kind == RefreshScheduleKind::AFTER && + parent_schedule.kind == RefreshScheduleKind::AFTER && + refresh_schedule.period == parent_schedule.period) { - /// TODO: Implement this: + /// TODO: Implement this. /// * Add setting max_after_delay_adjustment_pct - /// * Decrease both next_refresh_without_spread and next_refresh_with_spread, + /// * Decrease both next_refresh_prescribed and next_refresh_with_spread, /// but only if they haven't already been decreased this way during current period /// * refresh_task->schedule() } @@ -217,11 +205,11 @@ void RefreshTask::refreshTask() if (cancel_requested) { - /// Advance to the next refresh time according to schedule. + /// Move on to the next refresh time according to schedule. /// Otherwise we'd start another refresh immediately after canceling this one. auto now = std::chrono::system_clock::now(); if (now >= next_refresh_with_spread) - calculateNextRefreshTime(std::chrono::system_clock::now()); + advanceNextRefreshTime(now); } } @@ -277,7 +265,7 @@ void RefreshTask::refreshTask() reportState(RefreshState::Running); CurrentMetrics::Increment metric_inc(CurrentMetrics::RefreshingViews); - auto scheduled_time_without_spread = next_refresh_without_spread; + auto prescribed_time = next_refresh_prescribed; lock.unlock(); @@ -291,7 +279,7 @@ void RefreshTask::refreshTask() finished = executeRefresh(); if (finished) - completeRefresh(view, LastTaskResult::Finished, scheduled_time_without_spread); + completeRefresh(view, LastTaskResult::Finished, prescribed_time); } catch (...) { @@ -311,7 +299,7 @@ void RefreshTask::refreshTask() { auto now = std::chrono::system_clock::now(); reportLastRefreshTime(now); - calculateNextRefreshTime(now); + advanceNextRefreshTime(now); } } } @@ -351,7 +339,7 @@ bool RefreshTask::executeRefresh() return !not_finished; } -void RefreshTask::completeRefresh(std::shared_ptr view, LastTaskResult result, std::chrono::system_clock::time_point scheduled_time_without_spread) +void RefreshTask::completeRefresh(std::shared_ptr view, LastTaskResult result, std::chrono::sys_seconds prescribed_time) { auto stale_table = view->exchangeTargetTable(refresh_query->table_id); @@ -359,7 +347,7 @@ void RefreshTask::completeRefresh(std::shared_ptr view, StorageID my_id = set_entry->getID(); auto dependents = context->getRefreshSet().getDependents(my_id); for (const RefreshTaskHolder & dep_task : dependents) - dep_task->notify(my_id, scheduled_time_without_spread, refresh_timer); + dep_task->notify(my_id, prescribed_time, refresh_schedule); auto drop_context = Context::createCopy(context); InterpreterDropQuery::executeDropQuery(ASTDropQuery::Kind::Drop, drop_context, drop_context, stale_table, /*sync=*/true); @@ -396,28 +384,24 @@ void RefreshTask::cleanState() refresh_query.reset(); } -void RefreshTask::calculateNextRefreshTime(std::chrono::system_clock::time_point now) +void RefreshTask::advanceNextRefreshTime(std::chrono::system_clock::time_point now) { /// TODO: Add a setting to randomize initial delay in case of AFTER, for the case when the server /// is restarted more often than the refresh period. /// TODO: Maybe do something like skip_update_after_seconds and skip_update_after_ratio. - /// Unclear if that's useful at all if the last refresh timestamp is not remembered across restarts. + /// Or maybe that should be checked in refreshTask(), just before starting a refresh. + /// Probably only useful after we have concurrency limits. Or maybe it's not useful even then? - /// It's important to use time without spread here, otherwise we would do multiple refreshes instead - /// of one, if the generated spread is negative and the first refresh completes faster than the spread. - std::chrono::sys_seconds next = refresh_timer.next(next_refresh_without_spread); - if (next < now) - next = refresh_timer.next(now); // fell behind, skip to current time - - next_refresh_without_spread = next; - next_refresh_with_spread = next + std::chrono::seconds{refresh_spread(thread_local_rng)}; + std::chrono::sys_seconds next = refresh_schedule.prescribeNext(next_refresh_prescribed, now); + next_refresh_prescribed = next; + next_refresh_with_spread = refresh_schedule.addRandomSpread(next); reportNextRefreshTime(next_refresh_with_spread); } -bool RefreshTask::arriveDependency(const StorageID & parent_table_or_timer) +bool RefreshTask::arriveDependency(const StorageID & parent) { - remaining_dependencies.erase(parent_table_or_timer); + remaining_dependencies.erase(parent); if (!remaining_dependencies.empty() || !time_arrived) return false; populateDependencies(); diff --git a/src/Storages/MaterializedView/RefreshTask.h b/src/Storages/MaterializedView/RefreshTask.h index cdb0d22342e..342fe3fc514 100644 --- a/src/Storages/MaterializedView/RefreshTask.h +++ b/src/Storages/MaterializedView/RefreshTask.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include @@ -72,16 +72,14 @@ public: void shutdown(); /// Notify dependent task - void notify(const StorageID & parent_id, std::chrono::system_clock::time_point scheduled_time_without_spread, const RefreshTimer & parent_timer); + void notify(const StorageID & parent_id, std::chrono::sys_seconds prescribed_time, const RefreshSchedule & parent_schedule); private: Poco::Logger * log = nullptr; std::weak_ptr view_to_refresh; RefreshSet::Entry set_entry; - /// Refresh schedule - RefreshTimer refresh_timer; - std::uniform_int_distribution refresh_spread; + RefreshSchedule refresh_schedule; /// Task execution. Non-empty iff a refresh is in progress (possibly paused). /// Whoever unsets these should also call storeLastState(). @@ -112,7 +110,20 @@ private: std::atomic_bool interrupt_execution {false}; /// When to refresh next. Updated when a refresh is finished or canceled. - std::chrono::system_clock::time_point next_refresh_without_spread; + /// We maintain the distinction between: + /// * The "prescribed" time of the refresh, dictated by the refresh schedule. + /// E.g. for REFERSH EVERY 1 DAY, the prescribed time is always at the exact start of a day. + /// * Actual wall clock timestamps, e.g. when the refresh is scheduled to happen + /// (including random spread) or when a refresh completed. + /// The prescribed time is required for: + /// * Doing REFRESH EVERY correctly if the random spread came up negative, and a refresh completed + /// before the prescribed time. E.g. suppose a refresh was prescribed at 05:00, which was randomly + /// adjusted to 4:50, and the refresh completed at 4:55; we shouldn't schedule another refresh + /// at 5:00, so we should remember that the 4:50-4:55 refresh actually had prescribed time 5:00. + /// * Similarly, for dependencies between REFRESH EVERY tables, using actual time would be unreliable. + /// E.g. for REFRESH EVERY 1 DAY, yesterday's refresh of the dependency shouldn't trigger today's + /// refresh of the dependent even if it happened today (e.g. it was slow or had random spread > 1 day). + std::chrono::sys_seconds next_refresh_prescribed; std::chrono::system_clock::time_point next_refresh_with_spread; /// Calls refreshTask() from background thread. @@ -132,15 +143,15 @@ private: /// Methods that do the actual work: creating/dropping internal table, executing the query. void initializeRefresh(std::shared_ptr view); bool executeRefresh(); - void completeRefresh(std::shared_ptr view, LastTaskResult result, std::chrono::system_clock::time_point scheduled_time_without_spread); + void completeRefresh(std::shared_ptr view, LastTaskResult result, std::chrono::sys_seconds prescribed_time); void cancelRefresh(LastTaskResult result); void cleanState(); /// Assigns next_refresh_* - void calculateNextRefreshTime(std::chrono::system_clock::time_point now); + void advanceNextRefreshTime(std::chrono::system_clock::time_point now); /// Returns true if all dependencies are fulfilled now. Refills remaining_dependencies in this case. - bool arriveDependency(const StorageID & parent_table_or_timer); + bool arriveDependency(const StorageID & parent); bool arriveTime(); void populateDependencies(); diff --git a/src/Storages/MaterializedView/RefreshTimers.cpp b/src/Storages/MaterializedView/RefreshTimers.cpp deleted file mode 100644 index 0331bad82c3..00000000000 --- a/src/Storages/MaterializedView/RefreshTimers.cpp +++ /dev/null @@ -1,302 +0,0 @@ -#include - -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -namespace -{ - constexpr std::chrono::days ZERO_DAYS{0}; - constexpr std::chrono::days ONE_DAY{1}; -} - -RefreshAfterTimer::RefreshAfterTimer(const ASTTimeInterval * time_interval) -{ - if (time_interval) - { - for (auto && [kind, value] : time_interval->kinds) - setWithKind(kind, value); - } -} - -std::chrono::sys_seconds RefreshAfterTimer::after(std::chrono::system_clock::time_point tp) const -{ - auto tp_date = std::chrono::floor(tp); - auto tp_time_offset = std::chrono::floor(tp - tp_date); - std::chrono::year_month_day ymd(tp_date); - ymd += years; - ymd += months; - std::chrono::sys_days date = ymd; - date += weeks; - date += days; - auto result = std::chrono::time_point_cast(date); - result += tp_time_offset; - result += hours; - result += minutes; - result += seconds; - return result; -} - -void RefreshAfterTimer::setWithKind(IntervalKind kind, UInt64 val) -{ - switch (kind) - { - case IntervalKind::Second: - seconds = std::chrono::seconds{val}; - break; - case IntervalKind::Minute: - minutes = std::chrono::minutes{val}; - break; - case IntervalKind::Hour: - hours = std::chrono::hours{val}; - break; - case IntervalKind::Day: - days = std::chrono::days{val}; - break; - case IntervalKind::Week: - weeks = std::chrono::weeks{val}; - break; - case IntervalKind::Month: - months = std::chrono::months{val}; - break; - case IntervalKind::Year: - years = std::chrono::years{val}; - break; - default: - break; - } -} - -bool RefreshAfterTimer::operator==(const RefreshAfterTimer & rhs) const -{ - /// (Or maybe different implementations of standard library have different sizes of chrono types. - /// If so, feel free to just remove this assert.) - static_assert(sizeof(*this) == 40, "RefreshAfterTimer fields appear to have changed. Please update this operator==() here."); - return std::tie(seconds, minutes, hours, days, weeks, months, years) == std::tie(rhs.seconds, rhs.minutes, rhs.hours, rhs.days, rhs.weeks, rhs.months, rhs.years); -} - -RefreshEveryTimer::RefreshEveryTimer(const ASTTimePeriod & time_period, const ASTTimeInterval * time_offset) - : offset(time_offset) - , value{static_cast(time_period.value)} - , kind{time_period.kind} -{ - // TODO: validate invariants -} - -std::chrono::sys_seconds RefreshEveryTimer::next(std::chrono::system_clock::time_point tp) const -{ - if (value == 0) - return std::chrono::floor(tp); - switch (kind) - { - case IntervalKind::Second: - return alignedToSeconds(tp); - case IntervalKind::Minute: - return alignedToMinutes(tp); - case IntervalKind::Hour: - return alignedToHours(tp); - case IntervalKind::Day: - return alignedToDays(tp); - case IntervalKind::Week: - return alignedToWeeks(tp); - case IntervalKind::Month: - return alignedToMonths(tp); - case IntervalKind::Year: - return alignedToYears(tp); - default: - return std::chrono::ceil(tp); - } -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToYears(std::chrono::system_clock::time_point tp) const -{ - using namespace std::chrono_literals; - - auto tp_days = std::chrono::floor(tp); - std::chrono::year_month_day tp_ymd(tp_days); - auto normalize_years = [](std::chrono::year year) -> std::chrono::sys_days - { - return year / std::chrono::January / 1d; - }; - - auto prev_years = normalize_years(tp_ymd.year()); - if (auto prev_time = offset.after(prev_years); prev_time > tp) - return prev_time; - - auto next_years = normalize_years(std::chrono::year((int(tp_ymd.year()) / value + 1) * value)); - return offset.after(next_years); -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToMonths(std::chrono::system_clock::time_point tp) const -{ - using namespace std::chrono_literals; - - auto tp_days = std::chrono::floor(tp); - std::chrono::year_month_day tp_ymd(tp_days); - auto normalize_months = [](const std::chrono::year_month_day & ymd, unsigned month_value) -> std::chrono::sys_days - { - return ymd.year() / std::chrono::month{month_value} / 1d; - }; - - auto prev_month_value = static_cast(tp_ymd.month()) / value * value; - auto prev_months = normalize_months(tp_ymd, prev_month_value); - if (auto prev_time = offset.after(prev_months); prev_time > tp) - return prev_time; - - auto next_month_value = (static_cast(tp_ymd.month()) / value + 1) * value; - auto next_months = normalize_months(tp_ymd, next_month_value); - std::chrono::year_month_day next_ymd(next_months); - if (next_ymd.year() > tp_ymd.year()) - return offset.after(normalize_months(next_ymd, value)); - return offset.after(next_months); -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToWeeks(std::chrono::system_clock::time_point tp) const -{ - using namespace std::chrono_literals; - - auto cpp_weekday = offset.getDays() + ONE_DAY; - std::chrono::weekday offset_weekday((cpp_weekday - std::chrono::floor(cpp_weekday)).count()); - - auto tp_days = std::chrono::floor(tp); - std::chrono::year_month_weekday tp_ymd(tp_days); - auto normalize_weeks = [offset_weekday](const std::chrono::year_month_weekday & ymd, unsigned week_value) - { - return std::chrono::sys_days(ymd.year() / ymd.month() / std::chrono::weekday{offset_weekday}[week_value]); - }; - - auto prev_week_value = tp_ymd.index() / value * value; - auto prev_days = normalize_weeks(tp_ymd, prev_week_value); - if (auto prev_time = offset.after(prev_days - offset.getDays()); prev_time > tp) - return prev_time; - - auto next_day_value = (tp_ymd.index() / value + 1) * value; - auto next_days = normalize_weeks(tp_ymd, next_day_value); - std::chrono::year_month_weekday next_ymd(next_days); - if (next_ymd.year() > tp_ymd.year() || next_ymd.month() > tp_ymd.month()) - return offset.after(normalize_weeks(next_ymd, value) - offset.getDays()); - return offset.after(next_days); -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToDays(std::chrono::system_clock::time_point tp) const -{ - auto tp_days = std::chrono::floor(tp); - std::chrono::year_month_day tp_ymd(tp_days); - auto normalize_days = [](const std::chrono::year_month_day & ymd, unsigned day_value) -> std::chrono::sys_days - { - return ymd.year() / ymd.month() / std::chrono::day{day_value}; - }; - - auto prev_day_value = static_cast(tp_ymd.day()) / value * value; - auto prev_days = normalize_days(tp_ymd, prev_day_value); - if (auto prev_time = offset.after(prev_days); prev_time > tp) - return prev_time; - - auto next_day_value = (static_cast(tp_ymd.day()) / value + 1) * value; - auto next_days = normalize_days(tp_ymd, next_day_value); - std::chrono::year_month_day next_ymd(next_days); - if (next_ymd.year() > tp_ymd.year() || next_ymd.month() > tp_ymd.month()) - return offset.after(normalize_days(next_ymd, value)); - return offset.after(next_days); -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToHours(std::chrono::system_clock::time_point tp) const -{ - using namespace std::chrono_literals; - - auto tp_days = std::chrono::floor(tp); - auto tp_hours = std::chrono::floor(tp - tp_days); - - auto prev_hours = (tp_hours / value) * value; - if (auto prev_time = offset.after(tp_days + prev_hours); prev_time > tp) - return prev_time; - - auto next_hours = (tp_hours / value + 1h) * value; - if (std::chrono::floor(next_hours - 1h) > ZERO_DAYS) - return offset.after(tp_days + ONE_DAY + std::chrono::hours{value}); - return offset.after(tp_days + next_hours); -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToMinutes(std::chrono::system_clock::time_point tp) const -{ - using namespace std::chrono_literals; - - auto tp_hours = std::chrono::floor(tp); - auto tp_minutes = std::chrono::floor(tp - tp_hours); - - auto prev_minutes = (tp_minutes / value) * value; - if (auto prev_time = offset.after(tp_hours + prev_minutes); prev_time > tp) - return prev_time; - - auto next_minutes = (tp_minutes / value + 1min) * value; - if (std::chrono::floor(next_minutes - 1min) > 0h) - return offset.after(tp_hours + 1h + std::chrono::minutes{value}); - return offset.after(tp_hours + next_minutes); -} - -std::chrono::sys_seconds RefreshEveryTimer::alignedToSeconds(std::chrono::system_clock::time_point tp) const -{ - using namespace std::chrono_literals; - - auto tp_minutes = std::chrono::floor(tp); - auto tp_seconds = std::chrono::floor(tp - tp_minutes); - - auto next_seconds = (tp_seconds / value + 1s) * value; - if (std::chrono::floor(next_seconds - 1s) > 0min) - return tp_minutes + 1min + std::chrono::seconds{value}; - return tp_minutes + next_seconds; -} - -bool RefreshEveryTimer::operator==(const RefreshEveryTimer & rhs) const -{ - static_assert(sizeof(*this) == sizeof(offset) + 8, "RefreshEveryTimer fields appear to have changed. Please update this operator==() here."); - return std::tie(offset, value, kind) == std::tie(rhs.offset, rhs.value, rhs.kind); -} - -std::variant makeTimer(const ASTRefreshStrategy & strategy) -{ - using enum ASTRefreshStrategy::ScheduleKind; - switch (strategy.schedule_kind) - { - case EVERY: - return RefreshEveryTimer{*strategy.period, strategy.interval}; - case AFTER: - return RefreshAfterTimer{strategy.interval}; - default: - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown refresh strategy kind"); - } -} - -RefreshTimer::RefreshTimer(const ASTRefreshStrategy & strategy) : timer(makeTimer(strategy)) {} - -namespace -{ - -template -struct CombinedVisitor : Ts... { using Ts::operator()...; }; -template -CombinedVisitor(Ts...) -> CombinedVisitor; - -} - -std::chrono::sys_seconds RefreshTimer::next(std::chrono::system_clock::time_point tp) const -{ - CombinedVisitor visitor{ - [tp](const RefreshAfterTimer & timer_) { return timer_.after(tp); }, - [tp](const RefreshEveryTimer & timer_) { return timer_.next(tp); }}; - auto r = std::visit(std::move(visitor), timer); - chassert(r > tp); - return r; -} - -bool RefreshTimer::operator==(const RefreshTimer & rhs) const { return timer == rhs.timer; } -const RefreshAfterTimer * RefreshTimer::tryGetAfter() const { return std::get_if(&timer); } -const RefreshEveryTimer * RefreshTimer::tryGetEvery() const { return std::get_if(&timer); } - -} diff --git a/src/Storages/MaterializedView/RefreshTimers.h b/src/Storages/MaterializedView/RefreshTimers.h deleted file mode 100644 index 4625e8cd344..00000000000 --- a/src/Storages/MaterializedView/RefreshTimers.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include - -#include - -namespace DB -{ - -class ASTTimeInterval; -class ASTTimePeriod; -class ASTRefreshStrategy; - -/// Schedule timer for MATERIALIZED VIEW ... REFRESH AFTER ... queries -class RefreshAfterTimer -{ -public: - explicit RefreshAfterTimer(const ASTTimeInterval * time_interval); - - std::chrono::sys_seconds after(std::chrono::system_clock::time_point tp) const; - - std::chrono::seconds getSeconds() const { return seconds; } - std::chrono::minutes getMinutes() const { return minutes; } - std::chrono::hours getHours() const { return hours; } - std::chrono::days getDays() const { return days; } - std::chrono::weeks getWeeks() const { return weeks; } - std::chrono::months getMonths() const { return months; } - std::chrono::years getYears() const { return years; } - - bool operator==(const RefreshAfterTimer & rhs) const; - -private: - void setWithKind(IntervalKind kind, UInt64 val); - - std::chrono::seconds seconds{0}; - std::chrono::minutes minutes{0}; - std::chrono::hours hours{0}; - std::chrono::days days{0}; - std::chrono::weeks weeks{0}; - std::chrono::months months{0}; - std::chrono::years years{0}; -}; - -/// Schedule timer for MATERIALIZED VIEW ... REFRESH EVERY ... queries -class RefreshEveryTimer -{ -public: - explicit RefreshEveryTimer(const ASTTimePeriod & time_period, const ASTTimeInterval * time_offset); - - std::chrono::sys_seconds next(std::chrono::system_clock::time_point tp) const; - - bool operator==(const RefreshEveryTimer & rhs) const; - -private: - std::chrono::sys_seconds alignedToYears(std::chrono::system_clock::time_point tp) const; - - std::chrono::sys_seconds alignedToMonths(std::chrono::system_clock::time_point tp) const; - - std::chrono::sys_seconds alignedToWeeks(std::chrono::system_clock::time_point tp) const; - - std::chrono::sys_seconds alignedToDays(std::chrono::system_clock::time_point tp) const; - - std::chrono::sys_seconds alignedToHours(std::chrono::system_clock::time_point tp) const; - - std::chrono::sys_seconds alignedToMinutes(std::chrono::system_clock::time_point tp) const; - - std::chrono::sys_seconds alignedToSeconds(std::chrono::system_clock::time_point tp) const; - - RefreshAfterTimer offset; - UInt32 value{0}; - IntervalKind kind{IntervalKind::Second}; -}; - -struct RefreshTimer -{ - std::variant timer; - - explicit RefreshTimer(const ASTRefreshStrategy & strategy); - - std::chrono::sys_seconds next(std::chrono::system_clock::time_point tp) const; - - bool operator==(const RefreshTimer & rhs) const; - - const RefreshAfterTimer * tryGetAfter() const; - const RefreshEveryTimer * tryGetEvery() const; -}; - -} From ef4cc5ec7fe5b690acfaa453f117cc50800e4cc2 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 29 Nov 2023 02:32:41 +0000 Subject: [PATCH 66/88] Things --- src/Access/Common/AccessType.h | 3 +- src/Common/CalendarTimeInterval.cpp | 4 +- src/Common/CalendarTimeInterval.h | 4 +- src/Core/BackgroundSchedulePool.cpp | 4 +- src/Core/BackgroundSchedulePool.h | 6 +- src/Core/Settings.h | 1 + src/Interpreters/AddDefaultDatabaseVisitor.h | 5 +- src/Interpreters/InterpreterAlterQuery.cpp | 5 + src/Interpreters/InterpreterCreateQuery.cpp | 8 +- src/Interpreters/InterpreterSystemQuery.cpp | 4 + src/Parsers/ASTAlterQuery.cpp | 6 + src/Parsers/ASTAlterQuery.h | 4 + src/Parsers/ASTSystemQuery.h | 5 + src/Parsers/ParserAlterQuery.cpp | 9 + src/Parsers/ParserSystemQuery.cpp | 24 ++ src/Storages/AlterCommands.cpp | 14 +- src/Storages/AlterCommands.h | 4 + .../MaterializedView/RefreshSchedule.cpp | 6 + .../MaterializedView/RefreshSchedule.h | 1 + src/Storages/MaterializedView/RefreshSet.cpp | 148 ++++++------ src/Storages/MaterializedView/RefreshSet.h | 110 ++++----- src/Storages/MaterializedView/RefreshTask.cpp | 219 ++++++++++++------ src/Storages/MaterializedView/RefreshTask.h | 68 +++--- src/Storages/MergeTree/MergeTreeData.cpp | 3 + src/Storages/SelectQueryDescription.cpp | 13 +- src/Storages/SelectQueryDescription.h | 4 +- src/Storages/StorageInMemoryMetadata.cpp | 7 + src/Storages/StorageInMemoryMetadata.h | 6 + src/Storages/StorageMaterializedView.cpp | 26 ++- .../System/StorageSystemViewRefreshes.cpp | 54 +++-- .../02661_refreshable_materialized_views.sql | 25 ++ 31 files changed, 490 insertions(+), 310 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 0188cbb5b99..307dc8aa1cc 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -82,7 +82,8 @@ enum class AccessType \ M(ALTER_VIEW_REFRESH, "ALTER LIVE VIEW REFRESH, REFRESH VIEW", VIEW, ALTER_VIEW) \ M(ALTER_VIEW_MODIFY_QUERY, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \ - M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY; + M(ALTER_VIEW_MODIFY_REFRESH, "ALTER TABLE MODIFY QUERY", VIEW, ALTER_VIEW) \ + M(ALTER_VIEW, "", GROUP, ALTER) /* allows to execute ALTER VIEW REFRESH, ALTER VIEW MODIFY QUERY, ALTER VIEW MODIFY REFRESH; implicitly enabled by the grant ALTER_TABLE */\ \ M(ALTER, "", GROUP, ALL) /* allows to execute ALTER {TABLE|LIVE VIEW} */\ diff --git a/src/Common/CalendarTimeInterval.cpp b/src/Common/CalendarTimeInterval.cpp index bcedf63b3ff..de1ef76f8ab 100644 --- a/src/Common/CalendarTimeInterval.cpp +++ b/src/Common/CalendarTimeInterval.cpp @@ -64,12 +64,12 @@ CalendarTimeInterval::Intervals CalendarTimeInterval::toIntervals() const UInt64 CalendarTimeInterval::minSeconds() const { - return 3600*24 * (365 * months/12 + 28 * months%12) + seconds; + return 3600*24 * (months/12 * 365 + months%12 * 28) + seconds; } UInt64 CalendarTimeInterval::maxSeconds() const { - return 3600*24 * (366 * months/12 + 31 * months%12) + seconds; + return 3600*24 * (months/12 * 366 + months%12 * 31) + seconds; } void CalendarTimeInterval::assertSingleUnit() const diff --git a/src/Common/CalendarTimeInterval.h b/src/Common/CalendarTimeInterval.h index 40a390736bc..c68449de0b6 100644 --- a/src/Common/CalendarTimeInterval.h +++ b/src/Common/CalendarTimeInterval.h @@ -40,8 +40,6 @@ struct CalendarTimeInterval /// Add this interval to the timestamp. First months, then seconds. /// Gets weird near month boundaries: October 31 + 1 month = December 1. - /// Gets weird with leap years: 2004-03-15 + 1 year = 2005-03-16, - /// 2004-12-31 + 1 year = 2006-01-01, std::chrono::sys_seconds advance(std::chrono::system_clock::time_point t) const; /// Rounds the timestamp down to the nearest timestamp "aligned" with this interval. @@ -51,7 +49,7 @@ struct CalendarTimeInterval /// E.g. if the interval is 1 month, rounds down to the start of the month. /// * For seconds, rounds to a timestamp x such that (x - December 29 1969 (Monday)) is divisible /// by this interval. - /// E.g. if the interval is 1 week, rounds down to the start of the week. + /// E.g. if the interval is 1 week, rounds down to the start of the week (Monday). /// /// Guarantees: /// * advance(floor(x)) > x diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp index ec1ae047d05..fa892bc3c84 100644 --- a/src/Core/BackgroundSchedulePool.cpp +++ b/src/Core/BackgroundSchedulePool.cpp @@ -31,7 +31,7 @@ bool BackgroundSchedulePoolTaskInfo::schedule() return true; } -bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t milliseconds, bool overwrite) +bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t milliseconds, bool overwrite, bool only_if_scheduled) { std::lock_guard lock(schedule_mutex); @@ -39,6 +39,8 @@ bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t milliseconds, bool ove return false; if (delayed && !overwrite) return false; + if (!delayed && only_if_scheduled) + return false; pool.scheduleDelayedTask(shared_from_this(), milliseconds, lock); return true; diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h index e97b02e976f..eca93353283 100644 --- a/src/Core/BackgroundSchedulePool.h +++ b/src/Core/BackgroundSchedulePool.h @@ -106,8 +106,10 @@ public: bool schedule(); /// Schedule for execution after specified delay. - /// If overwrite is set then the task will be re-scheduled (if it was already scheduled, i.e. delayed == true). - bool scheduleAfter(size_t milliseconds, bool overwrite = true); + /// If overwrite is set, and the task is already scheduled with a delay (delayed == true), + /// the task will be re-scheduled with the new delay. + /// If only_if_scheduled is set, don't do anything unless the task is already scheduled with a delay. + bool scheduleAfter(size_t milliseconds, bool overwrite = true, bool only_if_scheduled = false); /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task. void deactivate(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d96b1b9fc10..7b84cfa6f5d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -584,6 +584,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ + M(Bool, stop_refreshable_materialized_views_on_startup, false, "On server startup, prevent scheduling of refreshable materialized views, as if with SYSTEM STOP VIEWS. You can manually start them with SYSTEM START VIEWS or SYSTEM START VIEW afterwards. Also applies to newly created views. Has no effect on non-refreshable materialized views.", 0) \ M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index e6354467938..b977a73d461 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -238,8 +238,9 @@ private: void visit(ASTRefreshStrategy & refresh, ASTPtr &) const { - for (auto & table : refresh.children) - tryVisit(table); + if (refresh.dependencies) + for (auto & table : refresh.dependencies->children) + tryVisit(table); } void visitChildren(IAST & ast) const diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index db93467c0a4..2a34932d950 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -460,6 +460,11 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_VIEW_MODIFY_QUERY, database, table); break; } + case ASTAlterCommand::MODIFY_REFRESH: + { + required_access.emplace_back(AccessType::ALTER_VIEW_MODIFY_REFRESH, database, table); + break; + } case ASTAlterCommand::LIVE_VIEW_REFRESH: { required_access.emplace_back(AccessType::ALTER_VIEW_REFRESH, database, table); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 801a46f4167..823f04e0580 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1089,6 +1089,11 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data "{} UUID specified, but engine of database {} is not Atomic", kind, create.getDatabase()); } + if (create.refresh_strategy && !internal) + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Refreshable materialized view requires Atomic database engine"); + /// ... because it needs to atomically replace the inner table after refresh + /// The database doesn't support UUID so we'll ignore it. The UUID could be set here because of either /// a) the initiator of `ON CLUSTER` query generated it to ensure the same UUIDs are used on different hosts; or /// b) `RESTORE from backup` query generated it to ensure the same UUIDs are used on different hosts. @@ -1212,11 +1217,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (create.refresh_strategy) { - /// TODO: This doesn't work for some reason. AddDefaultDatabaseVisitor visitor(getContext(), current_database); visitor.visit(*create.refresh_strategy); - - /// TODO: For DEPENDS ON, check that the specified tables exist. } if (create.columns_list) diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 7df6499fd39..6998a6ef978 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -629,6 +629,9 @@ BlockIO InterpreterSystemQuery::execute() case Type::RESUME_VIEW: getRefreshTask()->resume(); break; + case Type::TEST_VIEW: + getRefreshTask()->setFakeTime(query.fake_time_for_view); + break; case Type::DROP_REPLICA: dropReplica(query); break; @@ -1284,6 +1287,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::CANCEL_VIEW: case Type::PAUSE_VIEW: case Type::RESUME_VIEW: + case Type::TEST_VIEW: { if (!query.table) required_access.emplace_back(AccessType::SYSTEM_VIEWS); diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index ed9de6a46eb..84355817b2c 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -453,6 +453,12 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & << (settings.hilite ? hilite_none : ""); select->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::MODIFY_REFRESH) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY REFRESH " << settings.nl_or_ws + << (settings.hilite ? hilite_none : ""); + refresh->formatImpl(settings, state, frame); + } else if (type == ASTAlterCommand::LIVE_VIEW_REFRESH) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "REFRESH " << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 77c540aed33..0b115537a6d 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -40,6 +40,7 @@ public: MODIFY_SETTING, RESET_SETTING, MODIFY_QUERY, + MODIFY_REFRESH, REMOVE_TTL, REMOVE_SAMPLE_BY, @@ -166,6 +167,9 @@ public: */ ASTPtr values; + /// For MODIFY REFRESH + ASTPtr refresh; + bool detach = false; /// true for DETACH PARTITION bool part = false; /// true for ATTACH PART, DROP DETACHED PART and MOVE diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ec8e47f9513..fa849800ba6 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -98,6 +98,7 @@ public: CANCEL_VIEW, PAUSE_VIEW, RESUME_VIEW, + TEST_VIEW, END }; @@ -141,6 +142,10 @@ public: ServerType server_type; + /// For SYSTEM TEST VIEW (SET FAKE TIME