From 59153e865d4ffeda3c67cbdd945e14fdc860e446 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Tue, 19 Dec 2023 09:53:04 +0000 Subject: [PATCH 001/145] materialize column not to override past values Signed-off-by: Duc Canh Le --- src/Storages/MergeTree/MutateTask.cpp | 23 +++++++-- .../0_stateless/02008_materialize_column.sql | 1 + ..._column_not_override_past_values.reference | 29 +++++++++++ ...ialize_column_not_override_past_values.sql | 49 +++++++++++++++++++ 4 files changed, 97 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference create mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 827749aa094..a04d9cdb886 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -65,6 +65,7 @@ static void splitAndModifyMutationCommands( Poco::Logger * log) { auto part_columns = part->getColumnsDescription(); + const auto & table_columns = metadata_snapshot->getColumns(); if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { @@ -73,9 +74,16 @@ static void splitAndModifyMutationCommands( for (const auto & command : commands) { + if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) + { + /// For ordinary column with default expression, materialize column should not override past values + /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file + auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); + if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) + mutated_columns.emplace(command.column_name); + } if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC - || command.type == MutationCommand::Type::MATERIALIZE_COLUMN || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL || command.type == MutationCommand::Type::DELETE @@ -85,8 +93,6 @@ static void splitAndModifyMutationCommands( for (const auto & [column_name, expr] : command.column_to_update_expression) mutated_columns.emplace(column_name); - if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) - mutated_columns.emplace(command.column_name); } else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION @@ -196,8 +202,15 @@ static void splitAndModifyMutationCommands( { for (const auto & command : commands) { - if (command.type == MutationCommand::Type::MATERIALIZE_INDEX - || command.type == MutationCommand::Type::MATERIALIZE_COLUMN + if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) + { + /// For ordinary column with default expression, materialize column should not override past values + /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file + auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); + if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) + for_interpreter.push_back(command); + } + else if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC || command.type == MutationCommand::Type::MATERIALIZE_PROJECTION || command.type == MutationCommand::Type::MATERIALIZE_TTL diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql index a78920d2525..cc7d3096402 100644 --- a/tests/queries/0_stateless/02008_materialize_column.sql +++ b/tests/queries/0_stateless/02008_materialize_column.sql @@ -17,6 +17,7 @@ ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+2); SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; +ALTER TABLE tmp CLEAR COLUMN s; -- Need to clear because MATERIALIZE COLUMN won't override past values; ALTER TABLE tmp MATERIALIZE COLUMN s; ALTER TABLE tmp MODIFY COLUMN s String DEFAULT toString(x+3); SELECT arraySort(groupArray(x)), groupArray(s) FROM tmp; diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference new file mode 100644 index 00000000000..6b0d88bd09b --- /dev/null +++ b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference @@ -0,0 +1,29 @@ +--Origin-- +1 2 +2 54321 +--After materialize-- +1 2 +2 54321 +--Origin-- +1 2 +2 54321 +--After materialize-- +1 2 +2 54321 +--Origin-- +1 2 +2 \N +3 54321 +--After materialize-- +1 2 +2 \N +3 54321 +--Origin-- +1 2 +2 54321 +--After rename-- +1 2 +2 54321 +--After materialize-- +1 2 +2 54321 diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql new file mode 100644 index 00000000000..1815661e097 --- /dev/null +++ b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql @@ -0,0 +1,49 @@ + +SET mutations_sync = 2; +-- Compact parts +CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO test ( id, foo ) values ( 1, 2 ); +INSERT INTO test ( id ) values ( 2 ); +SELECT '--Origin--'; +SELECT * FROM test ORDER BY id; +ALTER TABLE test MATERIALIZE COLUMN foo; +SELECT '--After materialize--'; +SELECT * FROM test ORDER BY id; +DROP TABLE test; + +-- Wide parts +CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO test ( id, foo ) values ( 1, 2 ); +INSERT INTO test ( id ) values ( 2 ); +SELECT '--Origin--'; +SELECT * FROM test ORDER BY id; +ALTER TABLE test MATERIALIZE COLUMN foo; +SELECT '--After materialize--'; +SELECT * FROM test ORDER BY id; +DROP TABLE test; + +-- Nullable column != physically absent +CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO test ( id, foo ) values ( 1, 2 ); +INSERT INTO test ( id, foo ) values ( 2, NULL ); +INSERT INTO test ( id ) values ( 3 ); +SELECT '--Origin--'; +SELECT * FROM test ORDER BY id; +ALTER TABLE test MATERIALIZE COLUMN foo; +SELECT '--After materialize--'; +SELECT * FROM test ORDER BY id; +DROP TABLE test; + +-- Parts with renamed column +CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO test ( id, foo ) values ( 1, 2 ); +INSERT INTO test ( id ) values ( 2 ); +SELECT '--Origin--'; +SELECT * FROM test ORDER BY id; +ALTER TABLE test RENAME COLUMN foo TO bar; +SELECT '--After rename--'; +SELECT * FROM test ORDER BY id; +ALTER TABLE test MATERIALIZE COLUMN bar; +SELECT '--After materialize--'; +SELECT * FROM test ORDER BY id; +DROP TABLE test; \ No newline at end of file From e832599dfab7ba2304a4a00175ce48f6a63ed701 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 20 Dec 2023 04:57:56 +0000 Subject: [PATCH 002/145] fix materialize column for compact parts Signed-off-by: Duc Canh Le --- src/Storages/MergeTree/MutateTask.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a04d9cdb886..dd84aa0d98a 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -80,7 +80,11 @@ static void splitAndModifyMutationCommands( /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) + { + LOG_DEBUG(log, "Materializing column {}\n", command.column_name); + for_interpreter.push_back(command); mutated_columns.emplace(command.column_name); + } } if (command.type == MutationCommand::Type::MATERIALIZE_INDEX || command.type == MutationCommand::Type::MATERIALIZE_STATISTIC @@ -92,7 +96,6 @@ static void splitAndModifyMutationCommands( for_interpreter.push_back(command); for (const auto & [column_name, expr] : command.column_to_update_expression) mutated_columns.emplace(column_name); - } else if (command.type == MutationCommand::Type::DROP_INDEX || command.type == MutationCommand::Type::DROP_PROJECTION From 7b49a0e530e2a2cb8629c249b96f43c6554ea51d Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 20 Dec 2023 04:59:03 +0000 Subject: [PATCH 003/145] remove junk log Signed-off-by: Duc Canh Le --- src/Storages/MergeTree/MutateTask.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index dd84aa0d98a..bb41608eb00 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -81,7 +81,6 @@ static void splitAndModifyMutationCommands( auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) { - LOG_DEBUG(log, "Materializing column {}\n", command.column_name); for_interpreter.push_back(command); mutated_columns.emplace(command.column_name); } From c30736d415fcdaccb68a1c0e37e8c4de9242e014 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 12 Jan 2024 15:31:15 +0000 Subject: [PATCH 004/145] Cosmetics --- src/Storages/MergeTree/MutateTask.cpp | 8 +-- ...mn_must_not_override_past_values.reference | 33 ++++++++++++ ...e_column_must_not_override_past_values.sql | 53 +++++++++++++++++++ ..._column_not_override_past_values.reference | 29 ---------- ...ialize_column_not_override_past_values.sql | 49 ----------------- 5 files changed, 90 insertions(+), 82 deletions(-) create mode 100644 tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference create mode 100644 tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql delete mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference delete mode 100644 tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index bb41608eb00..25fa45e7b68 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -76,8 +76,8 @@ static void splitAndModifyMutationCommands( { if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) { - /// For ordinary column with default expression, materialize column should not override past values - /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file + /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values + /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) { @@ -206,8 +206,8 @@ static void splitAndModifyMutationCommands( { if (command.type == MutationCommand::Type::MATERIALIZE_COLUMN) { - /// For ordinary column with default expression, materialize column should not override past values - /// So we only mutated column if `command.column_name` is a materialized column or if the part does not have physical column file + /// For ordinary column with default or materialized expression, MATERIALIZE COLUMN should not override past values + /// So we only mutate column if `command.column_name` is a default/materialized column or if the part does not have physical column file auto column_ordinary = table_columns.getOrdinary().tryGetByName(command.column_name); if (!column_ordinary || !part->tryGetColumn(command.column_name) || !part->hasColumnFiles(*column_ordinary)) for_interpreter.push_back(command); diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference new file mode 100644 index 00000000000..a5a0370620b --- /dev/null +++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference @@ -0,0 +1,33 @@ +-- Compact parts +Origin +1 2 +2 54321 +After materialize +1 2 +2 54321 +-- Wide parts +Origin +1 2 +2 54321 +After materialize +1 2 +2 54321 +-- Nullable column != physically absent +Origin +1 2 +2 \N +3 54321 +After materialize +1 2 +2 \N +3 54321 +-- Parts with renamed column +Origin +1 2 +2 54321 +After rename +1 2 +2 54321 +After materialize +1 2 +2 54321 diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql new file mode 100644 index 00000000000..825c7eab048 --- /dev/null +++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql @@ -0,0 +1,53 @@ +SET mutations_sync = 2; + +SELECT '-- Compact parts'; + +CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id) VALUES (2); +SELECT 'Origin'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Wide parts'; + +CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id) VALUES (2); +SELECT 'Origin'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Nullable column != physically absent'; + +CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id, dflt) VALUES (2, NULL); +INSERT INTO tab (id) VALUES (3); +SELECT 'Origin'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN dflt; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Parts with renamed column'; + +CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id) VALUES (2); +SELECT 'Origin'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab RENAME COLUMN dflt TO dflt2; +SELECT 'After rename'; +SELECT * FROM tab ORDER BY id; +ALTER TABLE tab MATERIALIZE COLUMN bar; +SELECT 'After materialize'; +SELECT * FROM tab ORDER BY id; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference deleted file mode 100644 index 6b0d88bd09b..00000000000 --- a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.reference +++ /dev/null @@ -1,29 +0,0 @@ ---Origin-- -1 2 -2 54321 ---After materialize-- -1 2 -2 54321 ---Origin-- -1 2 -2 54321 ---After materialize-- -1 2 -2 54321 ---Origin-- -1 2 -2 \N -3 54321 ---After materialize-- -1 2 -2 \N -3 54321 ---Origin-- -1 2 -2 54321 ---After rename-- -1 2 -2 54321 ---After materialize-- -1 2 -2 54321 diff --git a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql deleted file mode 100644 index 1815661e097..00000000000 --- a/tests/queries/0_stateless/02946_materialize_column_not_override_past_values.sql +++ /dev/null @@ -1,49 +0,0 @@ - -SET mutations_sync = 2; --- Compact parts -CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id; -INSERT INTO test ( id, foo ) values ( 1, 2 ); -INSERT INTO test ( id ) values ( 2 ); -SELECT '--Origin--'; -SELECT * FROM test ORDER BY id; -ALTER TABLE test MATERIALIZE COLUMN foo; -SELECT '--After materialize--'; -SELECT * FROM test ORDER BY id; -DROP TABLE test; - --- Wide parts -CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; -INSERT INTO test ( id, foo ) values ( 1, 2 ); -INSERT INTO test ( id ) values ( 2 ); -SELECT '--Origin--'; -SELECT * FROM test ORDER BY id; -ALTER TABLE test MATERIALIZE COLUMN foo; -SELECT '--After materialize--'; -SELECT * FROM test ORDER BY id; -DROP TABLE test; - --- Nullable column != physically absent -CREATE TABLE test (id Int64, foo Nullable(Int64) default 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; -INSERT INTO test ( id, foo ) values ( 1, 2 ); -INSERT INTO test ( id, foo ) values ( 2, NULL ); -INSERT INTO test ( id ) values ( 3 ); -SELECT '--Origin--'; -SELECT * FROM test ORDER BY id; -ALTER TABLE test MATERIALIZE COLUMN foo; -SELECT '--After materialize--'; -SELECT * FROM test ORDER BY id; -DROP TABLE test; - --- Parts with renamed column -CREATE TABLE test (id Int64, foo Int64 default 54321) ENGINE MergeTree ORDER BY id; -INSERT INTO test ( id, foo ) values ( 1, 2 ); -INSERT INTO test ( id ) values ( 2 ); -SELECT '--Origin--'; -SELECT * FROM test ORDER BY id; -ALTER TABLE test RENAME COLUMN foo TO bar; -SELECT '--After rename--'; -SELECT * FROM test ORDER BY id; -ALTER TABLE test MATERIALIZE COLUMN bar; -SELECT '--After materialize--'; -SELECT * FROM test ORDER BY id; -DROP TABLE test; \ No newline at end of file From 799b8d6356e68c4544791f42a72d71bed38322c5 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 21 Jan 2024 19:00:40 +0000 Subject: [PATCH 005/145] support resource request canceling --- docs/en/operations/system-tables/scheduler.md | 4 + src/Common/Scheduler/ISchedulerNode.h | 2 + src/Common/Scheduler/ISchedulerQueue.h | 6 ++ src/Common/Scheduler/Nodes/FairPolicy.h | 98 ++++++++++--------- src/Common/Scheduler/Nodes/FifoQueue.h | 23 ++++- src/Common/Scheduler/Nodes/PriorityPolicy.h | 37 ++++--- .../gtest_resource_manager_hierarchical.cpp | 1 - .../Nodes/tests/gtest_resource_scheduler.cpp | 63 ++++++++++++ src/Common/Scheduler/ResourceGuard.h | 9 +- src/Common/Scheduler/ResourceRequest.cpp | 13 +++ src/Common/Scheduler/ResourceRequest.h | 30 +++--- src/Common/Scheduler/SchedulerRoot.h | 32 +++--- .../System/StorageSystemScheduler.cpp | 4 + 13 files changed, 218 insertions(+), 104 deletions(-) create mode 100644 src/Common/Scheduler/ResourceRequest.cpp diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md index 953db4c28f2..c4de7f76fdc 100644 --- a/docs/en/operations/system-tables/scheduler.md +++ b/docs/en/operations/system-tables/scheduler.md @@ -26,7 +26,9 @@ priority: 0 is_active: 0 active_children: 0 dequeued_requests: 67 +canceled_requests: 0 dequeued_cost: 4692272 +canceled_cost: 0 busy_periods: 63 vruntime: 938454.1999999989 system_vruntime: ᴺᵁᴸᴸ @@ -54,7 +56,9 @@ Columns: - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. - `active_children` (`UInt64`) - The number of children in active state. - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node. - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node. - `busy_periods` (`UInt64`) - The total number of deactivations of this node. - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 804026d7bf4..20c1f4332da 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -387,7 +387,9 @@ public: /// Introspection std::atomic dequeued_requests{0}; + std::atomic canceled_requests{0}; std::atomic dequeued_cost{0}; + std::atomic canceled_cost{0}; std::atomic busy_periods{0}; }; diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h index cbe63bd304a..532f4bf6c63 100644 --- a/src/Common/Scheduler/ISchedulerQueue.h +++ b/src/Common/Scheduler/ISchedulerQueue.h @@ -50,6 +50,12 @@ public: /// Should be called outside of scheduling subsystem, implementation must be thread-safe. virtual void enqueueRequest(ResourceRequest * request) = 0; + /// Cancel previously enqueued request. + /// Returns `false` and does nothing given unknown or already executed request. + /// Returns `true` if requests has been found and canceled. + /// Should be called outside of scheduling subsystem, implementation must be thread-safe. + virtual bool cancelRequest(ResourceRequest * request) = 0; + /// For introspection ResourceCost getBudget() const { diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index c0e187e6fa9..53740e7a543 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -134,56 +134,64 @@ public: std::pair dequeueRequest() override { - if (heap_size == 0) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - std::pop_heap(items.begin(), items.begin() + heap_size); - Item & current = items[heap_size - 1]; - - // SFQ fairness invariant: system vruntime equals last served request start-time - assert(current.vruntime >= system_vruntime); - system_vruntime = current.vruntime; - - // By definition vruntime is amount of consumed resource (cost) divided by weight - current.vruntime += double(request->cost) / current.child->info.weight; - max_vruntime = std::max(max_vruntime, current.vruntime); - - if (child_active) // Put active child back in heap after vruntime update + while (true) { - std::push_heap(items.begin(), items.begin() + heap_size); - } - else // Deactivate child if it is empty, but remember it's vruntime for latter activations - { - heap_size--; + if (heap_size == 0) + return {nullptr, false}; - // Store index of this inactive child in `parent.idx` - // This enables O(1) search of inactive children instead of O(n) - current.child->info.parent.idx = heap_size; - } + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + std::pop_heap(items.begin(), items.begin() + heap_size); + Item & current = items[heap_size - 1]; - // Reset any difference between children on busy period end - if (heap_size == 0) - { - // Reset vtime to zero to avoid floating-point error accumulation, - // but do not reset too often, because it's O(N) - UInt64 ns = clock_gettime_ns(); - if (last_reset_ns + 1000000000 < ns) + if (request) { - last_reset_ns = ns; - for (Item & item : items) - item.vruntime = 0; - max_vruntime = 0; - } - system_vruntime = max_vruntime; - busy_periods++; - } + // SFQ fairness invariant: system vruntime equals last served request start-time + assert(current.vruntime >= system_vruntime); + system_vruntime = current.vruntime; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, heap_size > 0}; + // By definition vruntime is amount of consumed resource (cost) divided by weight + current.vruntime += double(request->cost) / current.child->info.weight; + max_vruntime = std::max(max_vruntime, current.vruntime); + } + + if (child_active) // Put active child back in heap after vruntime update + { + std::push_heap(items.begin(), items.begin() + heap_size); + } + else // Deactivate child if it is empty, but remember it's vruntime for latter activations + { + heap_size--; + + // Store index of this inactive child in `parent.idx` + // This enables O(1) search of inactive children instead of O(n) + current.child->info.parent.idx = heap_size; + } + + // Reset any difference between children on busy period end + if (heap_size == 0) + { + // Reset vtime to zero to avoid floating-point error accumulation, + // but do not reset too often, because it's O(N) + UInt64 ns = clock_gettime_ns(); + if (last_reset_ns + 1000000000 < ns) + { + last_reset_ns = ns; + for (Item & item : items) + item.vruntime = 0; + max_vruntime = 0; + } + system_vruntime = max_vruntime; + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, heap_size > 0}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 38ae902bc2f..2adb7241314 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -40,7 +40,6 @@ public: void enqueueRequest(ResourceRequest * request) override { std::unique_lock lock(mutex); - request->enqueue_ns = clock_gettime_ns(); queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); @@ -63,6 +62,26 @@ public: return {result, !requests.empty()}; } + bool cancelRequest(ResourceRequest * request) override + { + std::unique_lock lock(mutex); + // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N) + for (auto i = requests.begin(), e = requests.end(); i != e; ++i) + { + if (*i == request) + { + requests.erase(i); + if (requests.empty()) + busy_periods++; + queue_cost -= request->cost; + canceled_requests++; + canceled_cost += request->cost; + return true; + } + } + return false; + } + bool isActive() override { std::unique_lock lock(mutex); @@ -105,7 +124,7 @@ public: private: std::mutex mutex; Int64 queue_cost = 0; - std::deque requests; + std::deque requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel }; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index 6d6b15bd063..fd02ea3df62 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -102,25 +102,30 @@ public: std::pair dequeueRequest() override { - if (items.empty()) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - - // Deactivate child if it is empty - if (!child_active) + while (true) { - std::pop_heap(items.begin(), items.end()); - items.pop_back(); if (items.empty()) - busy_periods++; - } + return {nullptr, false}; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, !items.empty()}; + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + + // Deactivate child if it is empty + if (!child_active) + { + std::pop_heap(items.begin(), items.end()); + items.pop_back(); + if (items.empty()) + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, !items.empty()}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp index 961a3b6f713..cdf09776077 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_manager_hierarchical.cpp @@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke) { ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking); gA.lock(); - gA.setFailure(); gA.unlock(); ResourceGuard gB(cB->get("res1")); diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index 9fefbc02cbd..e76639a4b01 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -4,6 +4,7 @@ #include +#include #include using namespace DB; @@ -73,6 +74,22 @@ struct ResourceHolder } }; +struct MyRequest : public ResourceRequest +{ + std::function on_execute; + + explicit MyRequest(ResourceCost cost_, std::function on_execute_) + : ResourceRequest(cost_) + , on_execute(on_execute_) + {} + + void execute() override + { + if (on_execute) + on_execute(); + } +}; + TEST(SchedulerRoot, Smoke) { ResourceTest t; @@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke) EXPECT_TRUE(fc2->requests.contains(&rg.request)); } } + +TEST(SchedulerRoot, Cancel) +{ + ResourceTest t; + + ResourceHolder r1(t); + auto * fc1 = r1.add("/", "1"); + r1.add("/prio"); + auto a = r1.addQueue("/prio/A", "1"); + auto b = r1.addQueue("/prio/B", "2"); + r1.registerResource(); + + std::barrier sync(2); + std::thread consumer1([&] + { + std::barrier destruct_sync(2); + MyRequest request(1,[&] + { + sync.arrive_and_wait(); // (A) + EXPECT_TRUE(fc1->requests.contains(&request)); + sync.arrive_and_wait(); // (B) + request.finish(); + destruct_sync.arrive_and_wait(); // (C) + }); + a.queue->enqueueRequest(&request); + destruct_sync.arrive_and_wait(); // (C) + }); + + std::thread consumer2([&] + { + MyRequest request(1,[&] + { + FAIL() << "This request must be canceled, but instead executes"; + }); + sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately + b.queue->enqueueRequest(&request); + bool canceled = b.queue->cancelRequest(&request); + EXPECT_TRUE(canceled); + sync.arrive_and_wait(); // (B) release request of consumer1 to be finished + }); + + consumer1.join(); + consumer2.join(); + + EXPECT_TRUE(fc1->requests.empty()); +} diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h index dca4041b176..50f665a384b 100644 --- a/src/Common/Scheduler/ResourceGuard.h +++ b/src/Common/Scheduler/ResourceGuard.h @@ -71,8 +71,7 @@ public: // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread chassert(state == Dequeued); state = Finished; - if (constraint) - constraint->finishRequest(this); + ResourceRequest::finish(); } static Request & local() @@ -126,12 +125,6 @@ public: } } - /// Mark request as unsuccessful; by default request is considered to be successful - void setFailure() - { - request.successful = false; - } - ResourceLink link; Request & request; }; diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp new file mode 100644 index 00000000000..26e8084cdfa --- /dev/null +++ b/src/Common/Scheduler/ResourceRequest.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +void ResourceRequest::finish() +{ + if (constraint) + constraint->finishRequest(this); +} + +} diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h index 3d2230746f9..f3153ad382c 100644 --- a/src/Common/Scheduler/ResourceRequest.h +++ b/src/Common/Scheduler/ResourceRequest.h @@ -14,9 +14,6 @@ class ISchedulerConstraint; using ResourceCost = Int64; constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); -/// Timestamps (nanoseconds since epoch) -using ResourceNs = UInt64; - /* * Request for a resource consumption. The main moving part of the scheduling subsystem. * Resource requests processing workflow: @@ -31,7 +28,7 @@ using ResourceNs = UInt64; * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. * 4) Callback ResourceRequest::execute() is called to provide access to the resource. * 5) The resource consumption is happening outside of the scheduling subsystem. - * 6) request->constraint->finishRequest() is called when consumption is finished. + * 6) ResourceRequest::finish() is called when consumption is finished. * * Steps (5) and (6) can be omitted if constraint is not used by the resource. * @@ -39,7 +36,10 @@ using ResourceNs = UInt64; * Request ownership is done outside of the scheduling subsystem. * After (6) request can be destructed safely. * - * Request cancelling is not supported yet. + * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest(). + * Returning false means it is too late for request to be canceled. It should be processed in a regular way. + * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen + * and step (6) MUST be omitted. */ class ResourceRequest { @@ -48,32 +48,20 @@ public: /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; - /// Request outcome - /// Should be filled during resource consumption - bool successful; - /// Scheduler node to be notified on consumption finish /// Auto-filled during request enqueue/dequeue ISchedulerConstraint * constraint; - /// Timestamps for introspection - ResourceNs enqueue_ns; - ResourceNs execute_ns; - ResourceNs finish_ns; - explicit ResourceRequest(ResourceCost cost_ = 1) { reset(cost_); } + /// ResourceRequest object may be reused again after reset() void reset(ResourceCost cost_) { cost = cost_; - successful = true; constraint = nullptr; - enqueue_ns = 0; - execute_ns = 0; - finish_ns = 0; } virtual ~ResourceRequest() = default; @@ -83,6 +71,12 @@ public: /// just triggering start of a consumption, not doing the consumption itself /// (e.g. setting an std::promise or creating a job in a thread pool) virtual void execute() = 0; + + /// Stop resource consumption and notify resource scheduler. + /// Should be called when resource consumption is finished by consumer. + /// ResourceRequest should not be destructed or reset before calling to `finish()`. + /// WARNING: this function MUST not be called if request was canceled. + void finish(); }; } diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index 3a23a8df834..ab3f702a422 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -145,22 +145,27 @@ public: std::pair dequeueRequest() override { - if (current == nullptr) // No active resources - return {nullptr, false}; + while (true) + { + if (current == nullptr) // No active resources + return {nullptr, false}; - // Dequeue request from current resource - auto [request, resource_active] = current->root->dequeueRequest(); - assert(request != nullptr); + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); - // Deactivate resource if required - if (!resource_active) - deactivate(current); - else - current = current->next; // Just move round-robin pointer + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer - dequeued_requests++; - dequeued_cost += request->cost; - return {request, current != nullptr}; + if (request == nullptr) // Possible in case of request cancel, just retry + continue; + + dequeued_requests++; + dequeued_cost += request->cost; + return {request, current != nullptr}; + } } bool isActive() override @@ -245,7 +250,6 @@ private: void execute(ResourceRequest * request) { - request->execute_ns = clock_gettime_ns(); request->execute(); } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index ba07d44dbf9..633bac5d285 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -30,7 +30,9 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, {"active_children", std::make_shared(), "The number of children in active state."}, {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, + {"canceled_requests", std::make_shared(), "The total number of resource requests canceled from this node."}, {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"canceled_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."}, {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, {"vruntime", std::make_shared(std::make_shared()), "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, @@ -93,7 +95,9 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(node->isActive()); res_columns[i++]->insert(node->activeChildren()); res_columns[i++]->insert(node->dequeued_requests.load()); + res_columns[i++]->insert(node->canceled_requests.load()); res_columns[i++]->insert(node->dequeued_cost.load()); + res_columns[i++]->insert(node->canceled_cost.load()); res_columns[i++]->insert(node->busy_periods.load()); Field vruntime; From df0c018a9be06e9ccbfb40460f29b155aa86b57f Mon Sep 17 00:00:00 2001 From: Hongbin Ma Date: Fri, 12 Jan 2024 16:09:09 +0800 Subject: [PATCH 006/145] support T64 for date32 type --- src/Compression/CompressionCodecT64.cpp | 6 +++++ .../00873_t64_codec_date.reference | 4 +++ .../0_stateless/00873_t64_codec_date.sql | 26 +++++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 tests/queries/0_stateless/00873_t64_codec_date.reference create mode 100644 tests/queries/0_stateless/00873_t64_codec_date.sql diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index bf9a9414bc1..42c6a18aa77 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -91,6 +91,7 @@ enum class MagicNumber : uint8_t Decimal32 = 19, Decimal64 = 20, IPv4 = 21, + Date32 = 22, }; MagicNumber serializeTypeId(std::optional type_id) @@ -109,6 +110,7 @@ MagicNumber serializeTypeId(std::optional type_id) case TypeIndex::Int32: return MagicNumber::Int32; case TypeIndex::Int64: return MagicNumber::Int64; case TypeIndex::Date: return MagicNumber::Date; + case TypeIndex::Date32: return MagicNumber::Date32; case TypeIndex::DateTime: return MagicNumber::DateTime; case TypeIndex::DateTime64: return MagicNumber::DateTime64; case TypeIndex::Enum8: return MagicNumber::Enum8; @@ -137,6 +139,7 @@ TypeIndex deserializeTypeId(uint8_t serialized_type_id) case MagicNumber::Int32: return TypeIndex::Int32; case MagicNumber::Int64: return TypeIndex::Int64; case MagicNumber::Date: return TypeIndex::Date; + case MagicNumber::Date32: return TypeIndex::Date32; case MagicNumber::DateTime: return TypeIndex::DateTime; case MagicNumber::DateTime64: return TypeIndex::DateTime64; case MagicNumber::Enum8: return TypeIndex::Enum8; @@ -177,6 +180,8 @@ TypeIndex baseType(TypeIndex type_idx) case TypeIndex::Enum16: case TypeIndex::Date: return TypeIndex::UInt16; + case TypeIndex::Date32: + return TypeIndex::Int32; case TypeIndex::UInt32: case TypeIndex::DateTime: case TypeIndex::IPv4: @@ -205,6 +210,7 @@ TypeIndex typeIdx(const IDataType * data_type) case TypeIndex::UInt16: case TypeIndex::Enum16: case TypeIndex::Date: + case TypeIndex::Date32: case TypeIndex::Int32: case TypeIndex::UInt32: case TypeIndex::IPv4: diff --git a/tests/queries/0_stateless/00873_t64_codec_date.reference b/tests/queries/0_stateless/00873_t64_codec_date.reference new file mode 100644 index 00000000000..1568c3122e6 --- /dev/null +++ b/tests/queries/0_stateless/00873_t64_codec_date.reference @@ -0,0 +1,4 @@ +1970-01-01 1970-01-01 1950-01-01 1950-01-01 +1970-01-01 1970-01-01 1970-01-01 1970-01-01 +2149-06-06 2149-06-06 2149-06-08 2149-06-08 +2149-06-06 2149-06-06 2149-06-06 2149-06-06 diff --git a/tests/queries/0_stateless/00873_t64_codec_date.sql b/tests/queries/0_stateless/00873_t64_codec_date.sql new file mode 100644 index 00000000000..e9230c75665 --- /dev/null +++ b/tests/queries/0_stateless/00873_t64_codec_date.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS t64; + +CREATE TABLE t64 +( + date16 Date, + t_date16 Date Codec(T64, ZSTD), + date_32 Date32, + t_date32 Date32 Codec(T64, ZSTD) +) ENGINE MergeTree() ORDER BY tuple(); + +INSERT INTO t64 values ('1970-01-01', '1970-01-01', '1970-01-01', '1970-01-01'); +INSERT INTO t64 values ('2149-06-06', '2149-06-06', '2149-06-06', '2149-06-06'); +INSERT INTO t64 values ('2149-06-08', '2149-06-08', '2149-06-08', '2149-06-08'); +INSERT INTO t64 values ('1950-01-01', '1950-01-01', '1950-01-01', '1950-01-01'); + +SELECT * FROM t64 ORDER BY date16; + +SELECT * FROM t64 WHERE date16 != t_date16; +SELECT * FROM t64 WHERE date_32 != t_date32; + +OPTIMIZE TABLE t64 FINAL; + +SELECT * FROM t64 WHERE date16 != t_date16; +SELECT * FROM t64 WHERE date_32 != t_date32; + +DROP TABLE t64; From 8c7218bac2fa09356750e23e79ed686c879665b6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 22 Jan 2024 09:40:19 +0000 Subject: [PATCH 007/145] Store latest logs inmemory --- src/Coordination/Changelog.cpp | 393 +++++++++++++++++++++++++-------- src/Coordination/Changelog.h | 48 +++- 2 files changed, 341 insertions(+), 100 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 7f1135eec94..c06a8bad91a 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -49,9 +50,15 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip } disk_from->copyFile(from_path, *disk_to, path_to, {}); disk_to->removeFile(tmp_changelog_name); + + /// a different thread could be trying to read from the file + /// we should make sure the source disk contains the file while read is in progress + { + std::lock_guard file_lock(description->file_mutex); + description->disk = disk_to; + } disk_from->removeFile(description->path); description->path = path_to; - description->disk = disk_to; } constexpr auto DEFAULT_PREFIX = "changelog"; @@ -111,9 +118,11 @@ class ChangelogWriter public: ChangelogWriter( std::map & existing_changelogs_, + LogEntryStorage & entry_storage_, KeeperContextPtr keeper_context_, LogFileSettings log_file_settings_) : existing_changelogs(existing_changelogs_) + , entry_storage(entry_storage_) , log_file_settings(log_file_settings_) , keeper_context(std::move(keeper_context_)) , log(&Poco::Logger::get("Changelog")) @@ -238,6 +247,7 @@ public: } auto & write_buffer = getBuffer(); + auto current_position = write_buffer.count(); writeIntBinary(computeRecordChecksum(record), write_buffer); writeIntBinary(record.header.version, write_buffer); @@ -255,6 +265,11 @@ public: /// Flush compressed data to file buffer compressed_buffer->next(); } + else + { + unflushed_indices_with_log_location.emplace_back( + record.header.index, LogLocation{.file_description = current_file_description, .position = current_position}); + } last_index_written = record.header.index; @@ -272,6 +287,8 @@ public: else file_buffer->next(); } + entry_storage.addLogLocations(std::move(unflushed_indices_with_log_location)); + unflushed_indices_with_log_location.clear(); } uint64_t getStartIndex() const @@ -314,9 +331,9 @@ public: private: void finalizeCurrentFile() { - assert(prealloc_done); + chassert(prealloc_done); - assert(current_file_description); + chassert(current_file_description); // compact can delete the file and we don't need to do anything if (current_file_description->deleted) { @@ -400,9 +417,11 @@ private: { const auto * file_buffer = tryGetFileBuffer(); + if (file_buffer) + initial_file_size = getSizeFromFileDescriptor(file_buffer->getFD()); + if (log_file_settings.max_size == 0 || !file_buffer) { - initial_file_size = 0; prealloc_done = true; return; } @@ -428,7 +447,6 @@ private: } } #endif - initial_file_size = getSizeFromFileDescriptor(file_buffer->getFD()); prealloc_done = true; } @@ -441,6 +459,10 @@ private: std::map & existing_changelogs; + LogEntryStorage & entry_storage; + + std::vector> unflushed_indices_with_log_location; + ChangelogFileDescriptionPtr current_file_description{nullptr}; std::unique_ptr file_buf; std::optional last_index_written; @@ -482,69 +504,88 @@ struct ChangelogReadResult bool error; }; +namespace +{ + +ChangelogRecord readChangelogRecord(ReadBuffer & read_buf, const std::string & filepath) +{ + /// Read checksum + Checksum record_checksum; + readIntBinary(record_checksum, read_buf); + + /// Read header + ChangelogRecord record; + readIntBinary(record.header.version, read_buf); + readIntBinary(record.header.index, read_buf); + readIntBinary(record.header.term, read_buf); + readIntBinary(record.header.value_type, read_buf); + readIntBinary(record.header.blob_size, read_buf); + + if (record.header.version > CURRENT_CHANGELOG_VERSION) + throw Exception( + ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", static_cast(record.header.version), filepath); + + /// Read data + if (record.header.blob_size != 0) + { + auto buffer = nuraft::buffer::alloc(record.header.blob_size); + auto * buffer_begin = reinterpret_cast(buffer->data_begin()); + read_buf.readStrict(buffer_begin, record.header.blob_size); + record.blob = buffer; + } + else + record.blob = nullptr; + + /// Compare checksums + Checksum checksum = computeRecordChecksum(record); + if (checksum != record_checksum) + { + throw Exception( + ErrorCodes::CHECKSUM_DOESNT_MATCH, + "Checksums doesn't match for log {} (version {}), index {}, blob_size {}", + filepath, + record.header.version, + record.header.index, + record.header.blob_size); + } + + return record; +} + +LogEntryPtr logEntryFromRecord(const ChangelogRecord & record) +{ + return nuraft::cs_new(record.header.term, record.blob, static_cast(record.header.value_type)); +} + +} + class ChangelogReader { public: - explicit ChangelogReader(DiskPtr disk_, const std::string & filepath_) : disk(disk_), filepath(filepath_) + explicit ChangelogReader(ChangelogFileDescriptionPtr changelog_description_) : changelog_description(changelog_description_) { - compression_method = chooseCompressionMethod(filepath, ""); - auto read_buffer_from_file = disk->readFile(filepath); + compression_method = chooseCompressionMethod(changelog_description->path, ""); + auto read_buffer_from_file = changelog_description->disk->readFile(changelog_description->path); read_buf = wrapReadBufferWithCompressionMethod(std::move(read_buffer_from_file), compression_method); } /// start_log_index -- all entries with index < start_log_index will be skipped, but accounted into total_entries_read_from_log - ChangelogReadResult readChangelog(IndexToLogEntry & logs, uint64_t start_log_index, Poco::Logger * log) + ChangelogReadResult readChangelog(LogEntryStorage & entry_storage, uint64_t start_log_index, Poco::Logger * log) { ChangelogReadResult result{}; result.compressed_log = compression_method != CompressionMethod::None; + const auto & filepath = changelog_description->path; try { while (!read_buf->eof()) { result.last_position = read_buf->count(); - /// Read checksum - Checksum record_checksum; - readIntBinary(record_checksum, *read_buf); - /// Read header - ChangelogRecord record; - readIntBinary(record.header.version, *read_buf); - readIntBinary(record.header.index, *read_buf); - readIntBinary(record.header.term, *read_buf); - readIntBinary(record.header.value_type, *read_buf); - readIntBinary(record.header.blob_size, *read_buf); - - if (record.header.version > CURRENT_CHANGELOG_VERSION) - throw Exception( - ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported changelog version {} on path {}", static_cast(record.header.version), filepath); - - /// Read data - if (record.header.blob_size != 0) - { - auto buffer = nuraft::buffer::alloc(record.header.blob_size); - auto * buffer_begin = reinterpret_cast(buffer->data_begin()); - read_buf->readStrict(buffer_begin, record.header.blob_size); - record.blob = buffer; - } - else - record.blob = nullptr; - - /// Compare checksums - Checksum checksum = computeRecordChecksum(record); - if (checksum != record_checksum) - { - throw Exception( - ErrorCodes::CHECKSUM_DOESNT_MATCH, - "Checksums doesn't match for log {} (version {}), index {}, blob_size {}", - filepath, - record.header.version, - record.header.index, - record.header.blob_size); - } + auto record = readChangelogRecord(*read_buf, filepath); /// Check for duplicated changelog ids - if (logs.contains(record.header.index)) - std::erase_if(logs, [&record](const auto & item) { return item.first >= record.header.index; }); + if (entry_storage.contains(record.header.index)) + entry_storage.eraseIf([&record](const auto index) { return index >= record.header.index; }); result.total_entries_read_from_log += 1; @@ -553,12 +594,15 @@ public: continue; /// Create log entry for read data - auto log_entry = nuraft::cs_new(record.header.term, record.blob, static_cast(record.header.value_type)); + auto log_entry = logEntryFromRecord(record); if (result.first_read_index == 0) result.first_read_index = record.header.index; /// Put it into in memory structure - logs.emplace(record.header.index, log_entry); + entry_storage.addEntryWithLocation( + record.header.index, + log_entry, + LogLocation{.file_description = changelog_description, .position = static_cast(result.last_position)}); result.last_read_index = record.header.index; if (result.total_entries_read_from_log % 50000 == 0) @@ -585,12 +629,189 @@ public: } private: - DiskPtr disk; - std::string filepath; + ChangelogFileDescriptionPtr changelog_description; CompressionMethod compression_method; std::unique_ptr read_buf; }; +size_t LogEntryStorage::size() const +{ + return total_entries; +} + +void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry) +{ + logs_cache.insert_or_assign(index, log_entry); + if (logs_cache.size() == 1) + min_index_in_cache = index; + + ++total_entries; +} + +void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location) +{ + logs_cache.emplace(index, log_entry); + logs_location.emplace(index, std::move(log_location)); + if (logs_cache.size() == 1) + min_index_in_cache = index; + else if (logs_cache.size() > 1000) + { + logs_cache.erase(min_index_in_cache); + ++min_index_in_cache; + } +} + +void LogEntryStorage::eraseIf(std::function index_predicate) +{ + std::erase_if(logs_cache, [&](const auto & item) { return index_predicate(item.first); }); +} + +bool LogEntryStorage::contains(uint64_t index) const +{ + return logs_cache.contains(index); +} + +LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const +{ + if (index >= min_index_in_cache) + return logs_cache.at(index); + + std::lock_guard lock(logs_location_mutex); + + if (auto it = logs_location.find(index); it != logs_location.end()) + { + const auto & [changelog_description, position] = it->second; + std::lock_guard file_lock(changelog_description->file_mutex); + //std::cout << "Reading from path " << changelog_description->path << std::endl; + auto file = changelog_description->disk->readFile(changelog_description->path); + file->seek(position, SEEK_SET); + + auto record = readChangelogRecord(*file, changelog_description->path); + return logEntryFromRecord(record); + } + else + std::cout << "Nothing found" << std::endl; + + return nullptr; +} + +void LogEntryStorage::clear() +{ + logs_cache.clear(); +} + +LogEntryPtr LogEntryStorage::getLatestConfigChange() const +{ + for (const auto & [_, entry] : logs_cache) + if (entry->get_val_type() == nuraft::conf) + return entry; + return nullptr; +} + +void LogEntryStorage::addLogLocations(std::vector> indices_with_log_locations) +{ + std::lock_guard lock(logs_location_mutex); + unapplied_indices_with_log_locations.insert( + unapplied_indices_with_log_locations.end(), + std::make_move_iterator(indices_with_log_locations.begin()), + std::make_move_iterator(indices_with_log_locations.end())); +} + +void LogEntryStorage::refreshCache() +{ + if (logs_cache.size() <= 1000) + return; + + std::lock_guard lock(logs_location_mutex); + if (logs_location.empty()) + return; + + auto max_index_to_remove = min_index_in_cache + (logs_cache.size() - 1000); + for (auto & [index, log_location] : unapplied_indices_with_log_locations) + { + logs_location.emplace(index, std::move(log_location)); + max_index_with_location = index; + } + + for (size_t index = min_index_in_cache; index < max_index_to_remove; ++index) + { + if (index <= max_index_with_location) + { + logs_cache.erase(index); + min_index_in_cache = index + 1; + } + } + + unapplied_indices_with_log_locations.clear(); +} + +LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end) const +{ + LogEntriesPtr ret = nuraft::cs_new>>(); + ret->reserve(end - start); + + /// we rely on fact that changelogs need to be written sequentially with + /// no other writes between + struct ReadInfo + { + ChangelogFileDescriptionPtr file_description; + size_t start_position = 0; + size_t count = 0; + }; + + /// we have to collect some logs from disks because they are not cached + if (start < min_index_in_cache) + { + //std::cout << "Reading some from disk" << std::endl; + std::lock_guard logs_location_lock(logs_location_mutex); + std::vector read_infos; + for (uint64_t i = start; i < min_index_in_cache && i < end; ++i) + { + const auto & log_location = logs_location.at(i); + const auto push_new_file = [&] + { + read_infos.push_back(ReadInfo + { + .file_description = log_location.file_description, + .start_position = log_location.position, + .count = 1, + }); + }; + + if (read_infos.empty()) + push_new_file(); + else if (auto & last = read_infos.back(); log_location.file_description == last.file_description) + ++last.count; + else + push_new_file(); + } + + for (const auto & [file_description, start_position, count] : read_infos) + { + std::cout << "Reading from path " << file_description->path << " " << count << " entries" << std::endl; + std::lock_guard file_lock(file_description->file_mutex); + auto file = file_description->disk->readFile(file_description->path); + file->seek(start_position, SEEK_SET); + + for (size_t i = 0; i < count; ++i) + { + auto record = readChangelogRecord(*file, file_description->path); + ret->push_back(logEntryFromRecord(record)); + } + } + + start = min_index_in_cache; + } + else + std::cout << "Nothing read from disk" << std::endl; + + for (uint64_t i = start; i < end; ++i) + ret->push_back(logs_cache.at(i)); + + return ret; + +} + Changelog::Changelog( Poco::Logger * log_, LogFileSettings log_file_settings, FlushSettings flush_settings_, KeeperContextPtr keeper_context_) : changelogs_detached_dir("detached") @@ -706,7 +927,7 @@ Changelog::Changelog( append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); }); - current_writer = std::make_unique(existing_changelogs, keeper_context, log_file_settings); + current_writer = std::make_unique(existing_changelogs, entry_storage, keeper_context, log_file_settings); } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) @@ -783,8 +1004,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin break; } - ChangelogReader reader(changelog_description.disk, changelog_description.path); - last_log_read_result = reader.readChangelog(logs, start_to_read_from, log); + ChangelogReader reader(changelog_description_ptr); + last_log_read_result = reader.readChangelog(entry_storage, start_to_read_from, log); if (last_log_read_result->last_read_index != 0) last_read_index = last_log_read_result->last_read_index; @@ -861,13 +1082,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin remove_invalid_logs(); description->disk->removeFile(description->path); existing_changelogs.erase(last_log_read_result->log_start_index); - std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first >= last_log_read_result->log_start_index; }); + entry_storage.eraseIf([last_log_read_result](const auto index) { return index >= last_log_read_result->log_start_index; }); } else if (last_log_read_result->error) { LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path); remove_invalid_logs(); - std::erase_if(logs, [last_log_read_result](const auto & item) { return item.first > last_log_read_result->last_read_index; }); + entry_storage.eraseIf([last_log_read_result](const auto index) { return index > last_log_read_result->last_read_index; }); move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index)); } /// don't mix compressed and uncompressed writes @@ -902,7 +1123,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin moveFileBetweenDisks(description->disk, description, disk, description->path); } - initialized = true; } @@ -1006,14 +1226,14 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index) LOG_WARNING(log, "Removing changelogs that go after broken changelog entry"); removeExistingLogs(start_to_remove_from_itr, existing_changelogs.end()); - std::erase_if(logs, [start_to_remove_from_log_id](const auto & item) { return item.first >= start_to_remove_from_log_id; }); + entry_storage.eraseIf([start_to_remove_from_log_id](const auto index) { return index >= start_to_remove_from_log_id; }); } void Changelog::removeAllLogs() { LOG_WARNING(log, "Removing all changelogs"); removeExistingLogs(existing_changelogs.begin(), existing_changelogs.end()); - logs.clear(); + entry_storage.clear(); } ChangelogRecord Changelog::buildRecord(uint64_t index, const LogEntryPtr & log_entry) @@ -1157,10 +1377,10 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records"); - if (logs.empty()) + if (min_log_id == 0) min_log_id = index; - logs[index] = log_entry; + entry_storage.addEntry(index, log_entry); max_log_id = index; if (!write_operations.push(AppendLog{index, log_entry})) @@ -1207,7 +1427,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) /// Remove redundant logs from memory /// Everything >= index must be removed - std::erase_if(logs, [index](const auto & item) { return item.first >= index; }); + entry_storage.eraseIf([index](const auto current_index) { return current_index >= index; }); /// Now we can actually override entry at index appendEntry(index, log_entry); @@ -1276,7 +1496,8 @@ void Changelog::compact(uint64_t up_to_log_index) } /// Compaction from the past is possible, so don't make our min_log_id smaller. min_log_id = std::max(min_log_id, up_to_log_index + 1); - std::erase_if(logs, [up_to_log_index](const auto & item) { return item.first <= up_to_log_index; }); + + entry_storage.eraseIf([up_to_log_index](const auto index) { return index <= up_to_log_index; }); if (need_rotate) current_writer->rotate(up_to_log_index + 1); @@ -1289,46 +1510,26 @@ LogEntryPtr Changelog::getLastEntry() const /// This entry treaded in special way by NuRaft static LogEntryPtr fake_entry = nuraft::cs_new(0, nuraft::buffer::alloc(sizeof(uint64_t))); - auto entry = logs.find(max_log_id); - if (entry == logs.end()) - { + auto entry = entry_storage.getEntry(max_log_id); + if (entry == nullptr) return fake_entry; - } - return entry->second; + return entry; } LogEntriesPtr Changelog::getLogEntriesBetween(uint64_t start, uint64_t end) { - LogEntriesPtr ret = nuraft::cs_new>>(); - - ret->resize(end - start); - uint64_t result_pos = 0; - for (uint64_t i = start; i < end; ++i) - { - (*ret)[result_pos] = entryAt(i); - result_pos++; - } - return ret; + return entry_storage.getLogEntriesBetween(start, end); } LogEntryPtr Changelog::entryAt(uint64_t index) { - nuraft::ptr src = nullptr; - auto entry = logs.find(index); - if (entry == logs.end()) - return nullptr; - - src = entry->second; - return src; + return entry_storage.getEntry(index); } LogEntryPtr Changelog::getLatestConfigChange() const { - for (const auto & [_, entry] : logs) - if (entry->get_val_type() == nuraft::conf) - return entry; - return nullptr; + return entry_storage.getLatestConfigChange(); } nuraft::ptr Changelog::serializeEntriesToBuffer(uint64_t index, int32_t count) @@ -1339,11 +1540,11 @@ nuraft::ptr Changelog::serializeEntriesToBuffer(uint64_t index, uint64_t size_total = 0; for (uint64_t i = index; i < index + count; ++i) { - auto entry = logs.find(i); - if (entry == logs.end()) + auto entry = entry_storage.getEntry(i); + if (entry == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Don't have log entry {}", i); - nuraft::ptr buf = entry->second->serialize(); + nuraft::ptr buf = entry->serialize(); size_total += buf->size(); returned_logs.push_back(std::move(buf)); } @@ -1374,7 +1575,7 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer) buffer.get(buf_local); LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local); - if (i == 0 && logs.contains(cur_index)) + if (i == 0 && entry_storage.contains(cur_index)) writeAt(cur_index, log_entry); else appendEntry(cur_index, log_entry); @@ -1409,6 +1610,8 @@ std::shared_ptr Changelog::flushAsync() LOG_WARNING(log, "Changelog is shut down"); return nullptr; } + + entry_storage.refreshCache(); return failed; } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 20f850e3f62..ee212ef3a71 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -23,7 +22,6 @@ using LogEntries = std::vector; using LogEntriesPtr = nuraft::ptr; using BufferPtr = nuraft::ptr; -using IndexToOffset = std::unordered_map; using IndexToLogEntry = std::unordered_map; enum class ChangelogVersion : uint8_t @@ -63,6 +61,8 @@ struct ChangelogFileDescription DiskPtr disk; std::string path; + std::mutex file_mutex; + bool deleted = false; /// How many entries should be stored in this log @@ -87,6 +87,43 @@ struct FlushSettings uint64_t max_flush_batch_size = 1000; }; +struct LogLocation +{ + ChangelogFileDescriptionPtr file_description; + size_t position; +}; + +struct LogEntryStorage +{ + size_t size() const; + + void addEntry(uint64_t index, const LogEntryPtr & log_entry); + void addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location); + void eraseIf(std::function index_predicate); + bool contains(uint64_t index) const; + LogEntryPtr getEntry(uint64_t index) const; + void clear(); + LogEntryPtr getLatestConfigChange() const; + + using IndexWithLogLocation = std::pair; + + void addLogLocations(std::vector indices_with_log_locations); + + void refreshCache(); + + LogEntriesPtr getLogEntriesBetween(uint64_t start, uint64_t end) const; +private: + /// Mapping log_id -> log_entry + IndexToLogEntry logs_cache; + size_t min_index_in_cache = 0; + + size_t total_entries = 0; + mutable std::mutex logs_location_mutex; + std::vector unapplied_indices_with_log_locations; + std::unordered_map logs_location; + size_t max_index_with_location = 0; +}; + /// Simplest changelog with files rotation. /// No compression, no metadata, just entries with headers one by one. /// Able to read broken files/entries and discard them. Not thread safe. @@ -143,7 +180,7 @@ public: void shutdown(); - uint64_t size() const { return logs.size(); } + uint64_t size() const { return entry_storage.size(); } uint64_t lastDurableIndex() const { @@ -190,8 +227,9 @@ private: std::mutex writer_mutex; /// Current writer for changelog file std::unique_ptr current_writer; - /// Mapping log_id -> log_entry - IndexToLogEntry logs; + + LogEntryStorage entry_storage; + /// Start log_id which exists in all "active" logs /// min_log_id + 1 == max_log_id means empty log storage for NuRaft uint64_t min_log_id = 0; From 36055bd0089f52473f893d71c475a2782a45e8b4 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 25 Jan 2024 21:44:46 +0000 Subject: [PATCH 008/145] init --- src/Functions/FunctionBinaryArithmetic.h | 106 ++++++++++++++---- src/Functions/IsOperation.h | 4 +- .../02975_intdiv_with_decimal.reference | 52 +++++++++ .../0_stateless/02975_intdiv_with_decimal.sql | 54 +++++++++ 4 files changed, 196 insertions(+), 20 deletions(-) create mode 100644 tests/queries/0_stateless/02975_intdiv_with_decimal.reference create mode 100644 tests/queries/0_stateless/02975_intdiv_with_decimal.sql diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 1b2519d1ec5..e34514d15fd 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -146,10 +146,24 @@ private: /// it's not correct for Decimal public: static constexpr bool allow_decimal = IsOperation::allow_decimal; + static constexpr bool only_integer = IsOperation::div_int || IsOperation::div_int_or_zero; /// Appropriate result type for binary operator on numeric types. "Date" can also mean /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). using ResultDataType = Switch< + /// Result must be Integer + Case< + only_integer && IsDataTypeDecimal && IsDataTypeDecimal, + Switch< + Case || std::is_same_v, DataTypeInt256>, + Case || std::is_same_v, DataTypeInt128>, + Case || std::is_same_v, DataTypeInt64>, + Case || std::is_same_v, DataTypeInt32>>>, + Case< + only_integer, + Switch< + Case, LeftDataType>, + Case, RightDataType>>>, /// Decimal cases Case || IsDataTypeDecimal), InvalidType>, Case< @@ -1667,31 +1681,77 @@ public: { if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { - if constexpr (is_division) + if constexpr (is_div_int || is_div_int_or_zero) { - if (context->getSettingsRef().decimal_check_overflow) - { - /// Check overflow by using operands scale (based on big decimal division implementation details): - /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers - /// i.e. int_operand = decimal_operand*10^scale - /// For division, left operand will be scaled by right operand scale also to do big integer division, - /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale - /// So, we can check upfront possible overflow just by checking max scale used for left operand - /// Note: it doesn't detect all possible overflow during big decimal division - if (left.getScale() + right.getScale() > ResultDataType::maxPrecision()) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); - } + if constexpr (std::is_same_v || std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v || std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v || std::is_same_v) + type_res = std::make_shared(); + else + type_res = std::make_shared(); + } + else + { + if constexpr (is_division) + { + if (context->getSettingsRef().decimal_check_overflow) + { + /// Check overflow by using operands scale (based on big decimal division implementation details): + /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers + /// i.e. int_operand = decimal_operand*10^scale + /// For division, left operand will be scaled by right operand scale also to do big integer division, + /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale + /// So, we can check upfront possible overflow just by checking max scale used for left operand + /// Note: it doesn't detect all possible overflow during big decimal division + if (left.getScale() + right.getScale() > ResultDataType::maxPrecision()) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); + } + } + ResultDataType result_type = decimalResultType(left, right); + type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); } - ResultDataType result_type = decimalResultType(left, right); - type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); } else if constexpr ((IsDataTypeDecimal && IsFloatingPoint) || (IsDataTypeDecimal && IsFloatingPoint)) type_res = std::make_shared(); else if constexpr (IsDataTypeDecimal) - type_res = std::make_shared(left.getPrecision(), left.getScale()); + { + if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral) + type_res = std::make_shared(); + else if constexpr (is_div_int || is_div_int_or_zero) + { + if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v) + type_res = std::make_shared(); + else + type_res = std::make_shared(); + } + else + type_res = std::make_shared(left.getPrecision(), left.getScale()); + } else if constexpr (IsDataTypeDecimal) - type_res = std::make_shared(right.getPrecision(), right.getScale()); + { + if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral) + type_res = std::make_shared(); + else if constexpr (is_div_int || is_div_int_or_zero) + { + if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v) + type_res = std::make_shared(); + else + type_res = std::make_shared(); + } + else + type_res = std::make_shared(right.getPrecision(), right.getScale()); + } else if constexpr (std::is_same_v) { // Special case for DateTime: binary OPS should reuse timezone @@ -2009,8 +2069,10 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A constexpr bool decimal_with_float = (IsDataTypeDecimal && IsFloatingPoint) || (IsFloatingPoint && IsDataTypeDecimal); - using T0 = std::conditional_t; - using T1 = std::conditional_t; + constexpr bool is_div_int_with_decimal = (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal || IsDataTypeDecimal); + + using T0 = std::conditional_t>; + using T1 = std::conditional_t>; using ResultType = typename ResultDataType::FieldType; using ColVecT0 = ColumnVectorOrDecimal; using ColVecT1 = ColumnVectorOrDecimal; @@ -2026,6 +2088,12 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A left_col = castColumn(arguments[0], converted_type); right_col = castColumn(arguments[1], converted_type); } + else if constexpr (is_div_int_with_decimal) + { + const auto converted_type = std::make_shared(); + left_col = castColumn(arguments[0], converted_type); + right_col = castColumn(arguments[1], converted_type); + } else { left_col = arguments[0].column; diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index 8ea53c865ce..b36530591ef 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -62,7 +62,9 @@ struct IsOperation static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo; - static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest; + static constexpr bool division_allow_decimal = div_floating || modulo; + + static constexpr bool allow_decimal = plus || minus || multiply || division_allow_decimal || least || greatest; }; } diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference new file mode 100644 index 00000000000..9c1faab21d7 --- /dev/null +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference @@ -0,0 +1,52 @@ +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql new file mode 100644 index 00000000000..8fc4b5a9a7d --- /dev/null +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql @@ -0,0 +1,54 @@ +--intDiv-- +SELECT intDiv(4,2); +SELECT intDiv(toDecimal32(4.4, 2), 2); +SELECT intDiv(4, toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal32(4.4, 2), 2); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 3), 2); +SELECT intDiv(toDecimal64(4.4, 3), toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 4), 2); +SELECT intDiv(toDecimal128(4.4, 4), toDecimal32(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 5), 2); +SELECT intDiv(toDecimal256(4.4, 5), toDecimal32(2.2, 2)); +SELECT intDiv(4, toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(4, toDecimal128(2.2, 3)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 3)); +SELECT intDiv(4, toDecimal256(2.2, 4)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 4)); +SELECT intDiv(toDecimal64(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDiv(toDecimal64(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal128(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal256(4.4, 2), toDecimal256(2.2, 2)); +--intDivOrZero-- +SELECT intDivOrZero(4,2); +SELECT intDivOrZero(toDecimal32(4.4, 2), 2); +SELECT intDivOrZero(4, toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal32(4.4, 2), 2); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 3), 2); +SELECT intDivOrZero(toDecimal64(4.4, 3), toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 4), 2); +SELECT intDivOrZero(toDecimal128(4.4, 4), toDecimal32(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 5), 2); +SELECT intDivOrZero(toDecimal256(4.4, 5), toDecimal32(2.2, 2)); +SELECT intDivOrZero(4, toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(4, toDecimal128(2.2, 3)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 3)); +SELECT intDivOrZero(4, toDecimal256(2.2, 4)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 4)); +SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal64(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal128(2.2, 2)); +SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal256(2.2, 2)); From ba85642453915dd57c0cba256b35bf8bec390ea5 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 28 Jan 2024 20:26:55 +0000 Subject: [PATCH 009/145] split ISlotControl from ConcurrencyControl --- programs/server/Server.cpp | 2 +- src/Common/ConcurrencyControl.cpp | 28 ++++--- src/Common/ConcurrencyControl.h | 36 ++++----- src/Common/ISlotControl.h | 76 +++++++++++++++++++ .../tests/gtest_concurrency_control.cpp | 28 +++---- src/Processors/Executors/PipelineExecutor.cpp | 14 ++-- src/Processors/Executors/PipelineExecutor.h | 4 +- 7 files changed, 132 insertions(+), 56 deletions(-) create mode 100644 src/Common/ISlotControl.h diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 75ec574c357..d6bee995ca4 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1366,7 +1366,7 @@ try global_context->setMaxDatabaseNumToWarn(new_server_settings.max_database_num_to_warn); global_context->setMaxPartNumToWarn(new_server_settings.max_part_num_to_warn); - ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited; + SlotCount concurrent_threads_soft_limit = UnlimitedSlots; if (new_server_settings.concurrent_threads_soft_limit_num > 0 && new_server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num; if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0) diff --git a/src/Common/ConcurrencyControl.cpp b/src/Common/ConcurrencyControl.cpp index c9fe51550dc..0893cfce955 100644 --- a/src/Common/ConcurrencyControl.cpp +++ b/src/Common/ConcurrencyControl.cpp @@ -12,10 +12,10 @@ namespace ErrorCodes ConcurrencyControl::Slot::~Slot() { - allocation->release(); + static_cast(*allocation).release(); } -ConcurrencyControl::Slot::Slot(AllocationPtr && allocation_) +ConcurrencyControl::Slot::Slot(SlotAllocationPtr && allocation_) : allocation(std::move(allocation_)) { } @@ -27,7 +27,7 @@ ConcurrencyControl::Allocation::~Allocation() parent.free(this); } -[[nodiscard]] ConcurrencyControl::SlotPtr ConcurrencyControl::Allocation::tryAcquire() +[[nodiscard]] AcquiredSlotPtr ConcurrencyControl::Allocation::tryAcquire() { SlotCount value = granted.load(); while (value) @@ -35,15 +35,21 @@ ConcurrencyControl::Allocation::~Allocation() if (granted.compare_exchange_strong(value, value - 1)) { std::unique_lock lock{mutex}; - return SlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor + return AcquiredSlotPtr(new Slot(shared_from_this())); // can't use std::make_shared due to private ctor } } return {}; // avoid unnecessary locking } -ConcurrencyControl::SlotCount ConcurrencyControl::Allocation::grantedCount() const +SlotCount ConcurrencyControl::Allocation::grantedCount() const { - return granted; + return granted.load(); +} + +SlotCount ConcurrencyControl::Allocation::allocatedCount() const +{ + std::unique_lock lock{mutex}; + return allocated; } ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCount limit_, SlotCount granted_, Waiters::iterator waiter_) @@ -87,7 +93,7 @@ ConcurrencyControl::~ConcurrencyControl() abort(); } -[[nodiscard]] ConcurrencyControl::AllocationPtr ConcurrencyControl::allocate(SlotCount min, SlotCount max) +[[nodiscard]] SlotAllocationPtr ConcurrencyControl::allocate(SlotCount min, SlotCount max) { if (min > max) throw Exception(ErrorCodes::LOGICAL_ERROR, "ConcurrencyControl: invalid allocation requirements"); @@ -100,13 +106,13 @@ ConcurrencyControl::~ConcurrencyControl() // Create allocation and start waiting if more slots are required if (granted < max) - return AllocationPtr(new Allocation(*this, max, granted, + return SlotAllocationPtr(new Allocation(*this, max, granted, waiters.insert(cur_waiter, nullptr /* pointer is set by Allocation ctor */))); else - return AllocationPtr(new Allocation(*this, max, granted)); + return SlotAllocationPtr(new Allocation(*this, max, granted)); } -void ConcurrencyControl::setMaxConcurrency(ConcurrencyControl::SlotCount value) +void ConcurrencyControl::setMaxConcurrency(SlotCount value) { std::unique_lock lock{mutex}; max_concurrency = std::max(1, value); // never allow max_concurrency to be zero @@ -162,7 +168,7 @@ void ConcurrencyControl::schedule(std::unique_lock &) } } -ConcurrencyControl::SlotCount ConcurrencyControl::available(std::unique_lock &) const +SlotCount ConcurrencyControl::available(std::unique_lock &) const { if (cur_concurrency < max_concurrency) return max_concurrency - cur_concurrency; diff --git a/src/Common/ConcurrencyControl.h b/src/Common/ConcurrencyControl.h index 7e20384aa2a..ba94502962c 100644 --- a/src/Common/ConcurrencyControl.h +++ b/src/Common/ConcurrencyControl.h @@ -7,6 +7,7 @@ #include #include +#include namespace DB { @@ -34,41 +35,35 @@ namespace DB * Oversubscription is possible: total amount of allocated slots can exceed `setMaxConcurrency(limit)` * because `min` amount of slots is allocated for each query unconditionally. */ -class ConcurrencyControl : boost::noncopyable +class ConcurrencyControl : public ISlotControl { public: struct Allocation; - using AllocationPtr = std::shared_ptr; - using SlotCount = UInt64; using Waiters = std::list; - static constexpr SlotCount Unlimited = std::numeric_limits::max(); - // Scoped guard for acquired slot, see Allocation::tryAcquire() - struct Slot : boost::noncopyable + struct Slot : public IAcquiredSlot { - ~Slot(); + ~Slot() override; private: friend struct Allocation; // for ctor - explicit Slot(AllocationPtr && allocation_); + explicit Slot(SlotAllocationPtr && allocation_); - AllocationPtr allocation; + SlotAllocationPtr allocation; }; - // FIXME: have to be unique_ptr, but ThreadFromGlobalPool does not support move semantics yet - using SlotPtr = std::shared_ptr; - // Manages group of slots for a single query, see ConcurrencyControl::allocate(min, max) - struct Allocation : std::enable_shared_from_this, boost::noncopyable + struct Allocation : public ISlotAllocation { - ~Allocation(); + ~Allocation() override; // Take one already granted slot if available. Lock-free iff there is no granted slot. - [[nodiscard]] SlotPtr tryAcquire(); + [[nodiscard]] AcquiredSlotPtr tryAcquire() override; - SlotCount grantedCount() const; + SlotCount grantedCount() const override; + SlotCount allocatedCount() const override; private: friend struct Slot; // for release() @@ -94,7 +89,7 @@ public: ConcurrencyControl & parent; const SlotCount limit; - std::mutex mutex; // the following values must be accessed under this mutex + mutable std::mutex mutex; // the following values must be accessed under this mutex SlotCount allocated; // allocated total (including already `released`) SlotCount released = 0; @@ -103,17 +98,16 @@ public: const Waiters::iterator waiter; // iterator to itself in Waiters list; valid iff allocated < limit }; -public: ConcurrencyControl(); // WARNING: all Allocation objects MUST be destructed before ConcurrencyControl // NOTE: Recommended way to achieve this is to use `instance()` and do graceful shutdown of queries - ~ConcurrencyControl(); + ~ConcurrencyControl() override; // Allocate at least `min` and at most `max` slots. // If not all `max` slots were successfully allocated, a subscription for later allocation is created // Use `Allocation::tryAcquire()` to acquire allocated slot, before running a thread. - [[nodiscard]] AllocationPtr allocate(SlotCount min, SlotCount max); + [[nodiscard]] SlotAllocationPtr allocate(SlotCount min, SlotCount max) override; void setMaxConcurrency(SlotCount value); @@ -134,7 +128,7 @@ private: std::mutex mutex; Waiters waiters; Waiters::iterator cur_waiter; // round-robin pointer - SlotCount max_concurrency = Unlimited; + SlotCount max_concurrency = UnlimitedSlots; SlotCount cur_concurrency = 0; }; diff --git a/src/Common/ISlotControl.h b/src/Common/ISlotControl.h new file mode 100644 index 00000000000..add19f0cc0c --- /dev/null +++ b/src/Common/ISlotControl.h @@ -0,0 +1,76 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +// Interfaces for abstract "slot" allocation and control. +// Slot is a virtual entity existing in a limited amount (CPUs or memory chunks, etc). +// +// Every slot can be in one of the following states: +// * free: slot is available to be allocated. +// * allocated: slot is allocated to a specific ISlotAllocation. +// +// Allocated slots can be considered as: +// * granted: allocated, but not yet acquired. +// * acquired: acquired using IAcquiredSlot. +// +// Example for CPU (see ConcurrencyControl.h). Every slot represents one CPU in the system. +// Slot allocation is a request to allocate specific number of CPUs for a specific query. +// Acquired slot is an entity that is held by a thread as long as it is running. This allows +// total number of threads in the system to be limited and the distribution process to be controlled. +// +// TODO: +// - for preemption - ability to return granted slot back and reacquire it later. +// - for memory allocations - variable size of slots (in bytes). + +/// Number of slots +using SlotCount = UInt64; + +/// Unlimited number of slots +constexpr SlotCount UnlimitedSlots = std::numeric_limits::max(); + +/// Acquired slot holder. Slot is considered to be acquired as long the object exists. +class IAcquiredSlot : public std::enable_shared_from_this, boost::noncopyable +{ +public: + virtual ~IAcquiredSlot() = default; +}; + +using AcquiredSlotPtr = std::shared_ptr; + +/// Request for allocation of slots from ISlotControl. +/// Allows for more slots to be acquired and the whole request to be canceled. +class ISlotAllocation : public std::enable_shared_from_this, boost::noncopyable +{ +public: + virtual ~ISlotAllocation() = default; + + /// Take one already granted slot if available. + [[nodiscard]] virtual AcquiredSlotPtr tryAcquire() = 0; + + /// Returns the number of granted slots for given allocation (i.e. available to be acquired) + virtual SlotCount grantedCount() const = 0; + + /// Returns the total number of slots allocated at the moment (acquired and granted) + virtual SlotCount allocatedCount() const = 0; +}; + +using SlotAllocationPtr = std::shared_ptr; + +class ISlotControl : boost::noncopyable +{ +public: + virtual ~ISlotControl() = default; + + // Allocate at least `min` and at most `max` slots. + // If not all `max` slots were successfully allocated, a "subscription" for later allocation is created + [[nodiscard]] virtual SlotAllocationPtr allocate(SlotCount min, SlotCount max) = 0; +}; + +} diff --git a/src/Common/tests/gtest_concurrency_control.cpp b/src/Common/tests/gtest_concurrency_control.cpp index 8e5b89a72a0..5e579317ade 100644 --- a/src/Common/tests/gtest_concurrency_control.cpp +++ b/src/Common/tests/gtest_concurrency_control.cpp @@ -15,7 +15,7 @@ struct ConcurrencyControlTest { ConcurrencyControl cc; - explicit ConcurrencyControlTest(ConcurrencyControl::SlotCount limit = ConcurrencyControl::Unlimited) + explicit ConcurrencyControlTest(SlotCount limit = UnlimitedSlots) { cc.setMaxConcurrency(limit); } @@ -25,7 +25,7 @@ TEST(ConcurrencyControl, Unlimited) { ConcurrencyControlTest t; // unlimited number of slots auto slots = t.cc.allocate(0, 100500); - std::vector acquired; + std::vector acquired; while (auto slot = slots->tryAcquire()) acquired.emplace_back(std::move(slot)); ASSERT_TRUE(acquired.size() == 100500); @@ -34,14 +34,14 @@ TEST(ConcurrencyControl, Unlimited) TEST(ConcurrencyControl, Fifo) { ConcurrencyControlTest t(1); // use single slot - std::vector allocations; + std::vector allocations; constexpr int count = 42; allocations.reserve(count); for (int i = 0; i < count; i++) allocations.emplace_back(t.cc.allocate(0, 1)); for (int i = 0; i < count; i++) { - ConcurrencyControl::SlotPtr holder; + AcquiredSlotPtr holder; for (int j = 0; j < count; j++) { auto slot = allocations[j]->tryAcquire(); @@ -60,11 +60,11 @@ TEST(ConcurrencyControl, Fifo) TEST(ConcurrencyControl, Oversubscription) { ConcurrencyControlTest t(10); - std::vector allocations; + std::vector allocations; allocations.reserve(10); for (int i = 0; i < 10; i++) allocations.emplace_back(t.cc.allocate(1, 2)); - std::vector slots; + std::vector slots; // Normal allocation using maximum amount of slots for (int i = 0; i < 5; i++) { @@ -90,7 +90,7 @@ TEST(ConcurrencyControl, ReleaseUnacquiredSlots) { ConcurrencyControlTest t(10); { - std::vector allocations; + std::vector allocations; allocations.reserve(10); for (int i = 0; i < 10; i++) allocations.emplace_back(t.cc.allocate(1, 2)); @@ -98,7 +98,7 @@ TEST(ConcurrencyControl, ReleaseUnacquiredSlots) } // Check that slots were actually released auto allocation = t.cc.allocate(0, 20); - std::vector acquired; + std::vector acquired; while (auto slot = allocation->tryAcquire()) acquired.emplace_back(std::move(slot)); ASSERT_TRUE(acquired.size() == 10); @@ -110,7 +110,7 @@ TEST(ConcurrencyControl, DestroyNotFullyAllocatedAllocation) for (int i = 0; i < 3; i++) { auto allocation = t.cc.allocate(5, 20); - std::vector acquired; + std::vector acquired; while (auto slot = allocation->tryAcquire()) acquired.emplace_back(std::move(slot)); ASSERT_TRUE(acquired.size() == 10); @@ -122,7 +122,7 @@ TEST(ConcurrencyControl, DestroyAllocationBeforeSlots) ConcurrencyControlTest t(10); for (int i = 0; i < 3; i++) { - std::vector acquired; + std::vector acquired; auto allocation = t.cc.allocate(5, 20); while (auto slot = allocation->tryAcquire()) acquired.emplace_back(std::move(slot)); @@ -135,7 +135,7 @@ TEST(ConcurrencyControl, GrantReleasedToTheSameAllocation) { ConcurrencyControlTest t(3); auto allocation = t.cc.allocate(0, 10); - std::list acquired; + std::list acquired; while (auto slot = allocation->tryAcquire()) acquired.emplace_back(std::move(slot)); ASSERT_TRUE(acquired.size() == 3); // 0 1 2 @@ -183,7 +183,7 @@ TEST(ConcurrencyControl, SetSlotCount) { ConcurrencyControlTest t(10); auto allocation = t.cc.allocate(5, 30); - std::vector acquired; + std::vector acquired; while (auto slot = allocation->tryAcquire()) acquired.emplace_back(std::move(slot)); ASSERT_TRUE(acquired.size() == 10); @@ -200,7 +200,7 @@ TEST(ConcurrencyControl, SetSlotCount) ASSERT_TRUE(acquired.size() == 5); // Check that newly added slots are equally distributed over waiting allocations - std::vector acquired2; + std::vector acquired2; auto allocation2 = t.cc.allocate(0, 30); ASSERT_TRUE(!allocation->tryAcquire()); t.cc.setMaxConcurrency(15); // 10 slots added: 5 to the first allocation and 5 to the second one @@ -224,7 +224,7 @@ TEST(ConcurrencyControl, MultipleThreads) auto run_query = [&] (size_t max_threads) { - ConcurrencyControl::AllocationPtr slots = t.cc.allocate(1, max_threads); + SlotAllocationPtr slots = t.cc.allocate(1, max_threads); std::mutex threads_mutex; std::vector threads; threads.reserve(max_threads); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 580aaa2b259..a06bacd7d3b 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -138,8 +138,8 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) initializeExecution(1, true); // Acquire slot until we are done - single_thread_slot = slots->tryAcquire(); - chassert(single_thread_slot && "Unable to allocate slot for the first thread, but we just allocated at least one slot"); + single_thread_cpu_slot = cpu_slots->tryAcquire(); + chassert(single_thread_cpu_slot && "Unable to allocate cpu slot for the first thread, but we just allocated at least one slot"); if (yield_flag && *yield_flag) return true; @@ -155,7 +155,7 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) if (node->exception) std::rethrow_exception(node->exception); - single_thread_slot.reset(); + single_thread_cpu_slot.reset(); finalizeExecution(); return false; @@ -333,8 +333,8 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_ /// Allocate CPU slots from concurrency control size_t min_threads = concurrency_control ? 1uz : num_threads; - slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); - use_threads = slots->grantedCount(); + cpu_slots = ConcurrencyControl::instance().allocate(min_threads, num_threads); + use_threads = cpu_slots->grantedCount(); Queue queue; graph->initializeExecution(queue); @@ -348,7 +348,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_ void PipelineExecutor::spawnThreads() { - while (auto slot = slots->tryAcquire()) + while (auto slot = cpu_slots->tryAcquire()) { size_t thread_num = threads.fetch_add(1); @@ -405,7 +405,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) } else { - auto slot = slots->tryAcquire(); + auto slot = cpu_slots->tryAcquire(); executeSingleThread(0); } diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 862a460f0ed..cb74b524163 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -68,8 +68,8 @@ private: ExecutorTasks tasks; /// Concurrency control related - ConcurrencyControl::AllocationPtr slots; - ConcurrencyControl::SlotPtr single_thread_slot; // slot for single-thread mode to work using executeStep() + SlotAllocationPtr cpu_slots; + AcquiredSlotPtr single_thread_cpu_slot; // cpu slot for single-thread mode to work using executeStep() std::unique_ptr pool; std::atomic_size_t threads = 0; From 151ade2318f38adc5b732423a1ee1d228e1e5966 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Sun, 28 Jan 2024 21:38:21 +0100 Subject: [PATCH 010/145] Update src/Common/ISlotControl.h --- src/Common/ISlotControl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ISlotControl.h b/src/Common/ISlotControl.h index add19f0cc0c..aa7414d5465 100644 --- a/src/Common/ISlotControl.h +++ b/src/Common/ISlotControl.h @@ -35,7 +35,7 @@ using SlotCount = UInt64; /// Unlimited number of slots constexpr SlotCount UnlimitedSlots = std::numeric_limits::max(); -/// Acquired slot holder. Slot is considered to be acquired as long the object exists. +/// Acquired slot holder. Slot is considered to be acquired as long as the object exists. class IAcquiredSlot : public std::enable_shared_from_this, boost::noncopyable { public: From 1ab29bef622a8de3af7bec194598e3939c9f2d7a Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 29 Jan 2024 15:33:09 +0000 Subject: [PATCH 011/145] fixes --- src/Functions/FunctionBinaryArithmetic.h | 54 ++++++++++++++----- src/Functions/IsOperation.h | 6 +-- .../00700_decimal_arithm.reference | 14 ++--- .../01717_int_div_float_too_large_ubsan.sql | 4 +- .../02975_intdiv_with_decimal.reference | 52 +++++++++++------- .../0_stateless/02975_intdiv_with_decimal.sql | 16 ++++++ 6 files changed, 101 insertions(+), 45 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index e34514d15fd..831c1cf3aeb 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -153,17 +153,18 @@ public: using ResultDataType = Switch< /// Result must be Integer Case< - only_integer && IsDataTypeDecimal && IsDataTypeDecimal, + only_integer && (IsDataTypeDecimal || IsDataTypeDecimal), Switch< - Case || std::is_same_v, DataTypeInt256>, - Case || std::is_same_v, DataTypeInt128>, - Case || std::is_same_v, DataTypeInt64>, - Case || std::is_same_v, DataTypeInt32>>>, - Case< - only_integer, - Switch< - Case, LeftDataType>, - Case, RightDataType>>>, + Case< + IsDataTypeDecimal || IsDataTypeDecimal, + Switch< + Case, LeftDataType>, + Case, RightDataType>, + Case || std::is_same_v, DataTypeInt256>, + Case || std::is_same_v, DataTypeInt128>, + Case || std::is_same_v, DataTypeInt64>, + Case || std::is_same_v, DataTypeInt32>>>>>, + /// Decimal cases Case || IsDataTypeDecimal), InvalidType>, Case< @@ -1713,12 +1714,37 @@ public: type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); } } - else if constexpr ((IsDataTypeDecimal && IsFloatingPoint) || - (IsDataTypeDecimal && IsFloatingPoint)) - type_res = std::make_shared(); + else if constexpr (((IsDataTypeDecimal && IsFloatingPoint) || + (IsDataTypeDecimal && IsFloatingPoint)) && !(is_div_int || is_div_int_or_zero)) + { + if constexpr ((is_div_int || is_div_int_or_zero) && IsDataTypeDecimal) + { + if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v || std::is_same_v) + type_res = std::make_shared(); + else + type_res = std::make_shared(); + } + else if constexpr (is_div_int || is_div_int_or_zero) + { + if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v) + type_res = std::make_shared(); + else if constexpr (std::is_same_v || std::is_same_v) + type_res = std::make_shared(); + else + type_res = std::make_shared(); + } + else + type_res = std::make_shared(); + } else if constexpr (IsDataTypeDecimal) { - if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral) + if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended) type_res = std::make_shared(); else if constexpr (is_div_int || is_div_int_or_zero) { diff --git a/src/Functions/IsOperation.h b/src/Functions/IsOperation.h index b36530591ef..b2c7a27d375 100644 --- a/src/Functions/IsOperation.h +++ b/src/Functions/IsOperation.h @@ -61,10 +61,8 @@ struct IsOperation static constexpr bool bit_hamming_distance = IsSameOperation::value; static constexpr bool division = div_floating || div_int || div_int_or_zero || modulo; - - static constexpr bool division_allow_decimal = div_floating || modulo; - - static constexpr bool allow_decimal = plus || minus || multiply || division_allow_decimal || least || greatest; + // NOTE: allow_decimal should not fully contain `division` because of divInt + static constexpr bool allow_decimal = plus || minus || multiply || division || least || greatest; }; } diff --git a/tests/queries/0_stateless/00700_decimal_arithm.reference b/tests/queries/0_stateless/00700_decimal_arithm.reference index 811946c87e0..20f04696b1b 100644 --- a/tests/queries/0_stateless/00700_decimal_arithm.reference +++ b/tests/queries/0_stateless/00700_decimal_arithm.reference @@ -10,18 +10,18 @@ 63 21 -42 882 -882 2 0 2 0 63 21 -42 882 -882 2 0 2 0 1.00305798474369219219752355409390731264 -0.16305798474369219219752355409390731264 1.490591730234615865843651857942052864 -1.38847100762815390390123822295304634368 1.38847100762815390390123822295304634368 0.02 0.005 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2.02 0.505 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2.02 0.505 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2.02 0.505 -63.42 21.42 -41.58 890.82 -890.82 2.02 0.5 2.02 0.5 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2 0 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2 0 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.505 2 0 +63.42 21.42 -41.58 890.82 -890.82 2.02 0.5 2 0 63 -21 42 882 -882 0 2 0 2 63 -21 42 882 -882 0 2 0 2 63 -21 42 882 -882 0 2 0 2 1.00305798474369219219752355409390731264 0.16305798474369219219752355409390731264 -1.490591730234615865843651857942052864 -1.38847100762815390390123822295304634368 1.38847100762815390390123822295304634368 -0.00000000000000000000000000000000000001 0.00000000000000000000000000000000000001 -63.42 -21.42 41.58 890.82 -890.82 0.495 1.98 0.495 1.98 +63.42 -21.42 41.58 890.82 -890.82 0.495 1.98 0 2 63.42 -21.42 41.58 890.82 -890.82 -63.42 -21.42 41.58 890.82 -890.82 0.495049504950495049 1.980198019801980198 0.495049504950495049 1.980198019801980198 -63.42 -21.42 41.58 890.82 -890.82 0.49 1.98 0.49 1.98 +63.42 -21.42 41.58 890.82 -890.82 0.495049504950495049 1.980198019801980198 0 2 +63.42 -21.42 41.58 890.82 -890.82 0.49 1.98 0 2 -42 42 42 42 0.42 0.42 0.42 42.42 42.42 42.42 0 0 0 0 0 0 0 0 0 0 42 -42 -42 -42 -0.42 -0.42 -0.42 -42.42 -42.42 -42.42 diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql index c4f26a079f0..dc1e5b37050 100644 --- a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql +++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql @@ -1,2 +1,2 @@ -SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 } -SELECT intDiv(9223372036854775807, 1.); -- { serverError 153 } +SELECT intDiv(18446744073709551615, 0.9998999834060669); -- { serverError 153 } +SELECT intDiv(18446744073709551615, 1.); -- { serverError 153 } diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference index 9c1faab21d7..594dcee975a 100644 --- a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference @@ -24,28 +24,44 @@ 2 2 2 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 2 2 2 2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 +1 +1 +1 +1 2 2 2 diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql index 8fc4b5a9a7d..18e657caa8a 100644 --- a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql @@ -25,6 +25,14 @@ SELECT intDiv(toDecimal256(4.4, 2), toDecimal128(2.2, 2)); SELECT intDiv(toDecimal64(4.4, 2), toDecimal256(2.2, 2)); SELECT intDiv(toDecimal128(4.4, 2), toDecimal256(2.2, 2)); SELECT intDiv(toDecimal256(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDiv(4.2, toDecimal32(2.2, 2)); +SELECT intDiv(4.2, toDecimal64(2.2, 2)); +SELECT intDiv(4.2, toDecimal128(2.2, 2)); +SELECT intDiv(4.2, toDecimal256(2.2, 2)); +SELECT intDiv(toDecimal32(4.4, 2), 2.2); +SELECT intDiv(toDecimal64(4.4, 2), 2.2); +SELECT intDiv(toDecimal128(4.4, 2), 2.2); +SELECT intDiv(toDecimal256(4.4, 2), 2.2); --intDivOrZero-- SELECT intDivOrZero(4,2); SELECT intDivOrZero(toDecimal32(4.4, 2), 2); @@ -52,3 +60,11 @@ SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal128(2.2, 2)); SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal256(2.2, 2)); SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal256(2.2, 2)); SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal256(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal32(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal64(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal128(2.2, 2)); +SELECT intDivOrZero(4.2, toDecimal256(2.2, 2)); +SELECT intDivOrZero(toDecimal32(4.4, 2), 2.2); +SELECT intDivOrZero(toDecimal64(4.4, 2), 2.2); +SELECT intDivOrZero(toDecimal128(4.4, 2), 2.2); +SELECT intDivOrZero(toDecimal256(4.4, 2), 2.2); From 7a1458c9227f47de485a06e6e473d059da381631 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Jan 2024 15:21:58 +0000 Subject: [PATCH 012/145] Fix validating suspicious/experimental types in nested types --- .../parseColumnsListForTableFunction.cpp | 29 +++++++++++++++++-- .../02981_nested_bad_types.reference | 0 .../0_stateless/02981_nested_bad_types.sql | 27 +++++++++++++++++ 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02981_nested_bad_types.reference create mode 100644 tests/queries/0_stateless/02981_nested_bad_types.sql diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 551a883d093..fcdad7c93c1 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -48,8 +51,7 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings if (!settings.allow_suspicious_fixed_string_types) { - auto basic_type = removeLowCardinalityAndNullable(type); - if (const auto * fixed_string = typeid_cast(basic_type.get())) + if (const auto * fixed_string = typeid_cast(type.get())) { if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) throw Exception( @@ -71,6 +73,29 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings "Set setting allow_experimental_variant_type = 1 in order to allow it", type->getName()); } } + + if (const auto * nullable_type = typeid_cast(type.get())) + { + validateDataType(nullable_type->getNestedType(), settings); + } + else if (const auto * lc_type = typeid_cast(type.get())) + { + validateDataType(lc_type->getDictionaryType(), settings); + } + else if (const auto * array_type = typeid_cast(type.get())) + { + validateDataType(array_type->getNestedType(), settings); + } + else if (const auto * tuple_type = typeid_cast(type.get())) + { + for (const auto & element : tuple_type->getElements()) + validateDataType(element, settings); + } + else if (const auto * map_type = typeid_cast(type.get())) + { + validateDataType(map_type->getKeyType(), settings); + validateDataType(map_type->getValueType(), settings); + } } ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context) diff --git a/tests/queries/0_stateless/02981_nested_bad_types.reference b/tests/queries/0_stateless/02981_nested_bad_types.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql new file mode 100644 index 00000000000..663d39cb1e2 --- /dev/null +++ b/tests/queries/0_stateless/02981_nested_bad_types.sql @@ -0,0 +1,27 @@ +set allow_suspicious_low_cardinality_types=0; +set allow_suspicious_fixed_string_types=0; +set allow_experimental_variant_type=0; + +select [42]::Array(LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select [[[42]]]::Array(Array(Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + +select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select [[[42]]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select map('a', 42)::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select map('a', map('b', [42]))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select tuple('a', 42)::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} + +select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select [[[42]]]::Array(Array(Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} +select map('a', 42)::Map(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select map('a', map('b', [42]))::Map(String, Map(String, Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} +select tuple('a', 42)::Tuple(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} + From 0557cdb8a9def2e4c8df81d23cb526153ce023f8 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 30 Jan 2024 15:31:04 +0000 Subject: [PATCH 013/145] fix due to review --- src/Functions/FunctionBinaryArithmetic.h | 40 ++++++++++-------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 831c1cf3aeb..62a50f5e0c2 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -156,14 +156,18 @@ public: only_integer && (IsDataTypeDecimal || IsDataTypeDecimal), Switch< Case< - IsDataTypeDecimal || IsDataTypeDecimal, + IsDataTypeDecimal, + Switch< + Case, DataTypeInt256>, + Case, DataTypeInt128>, + Case, DataTypeInt64>, + Case, DataTypeInt32>>>, + Case< + IsDataTypeDecimal, Switch< Case, LeftDataType>, - Case, RightDataType>, - Case || std::is_same_v, DataTypeInt256>, - Case || std::is_same_v, DataTypeInt128>, - Case || std::is_same_v, DataTypeInt64>, - Case || std::is_same_v, DataTypeInt32>>>>>, + Case, DataTypeInt64>, + Case, DataTypeInt32>>>>>, /// Decimal cases Case || IsDataTypeDecimal), InvalidType>, @@ -1684,11 +1688,11 @@ public: { if constexpr (is_div_int || is_div_int_or_zero) { - if constexpr (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v) type_res = std::make_shared(); - else if constexpr (std::is_same_v || std::is_same_v) + else if constexpr (std::is_same_v) type_res = std::make_shared(); - else if constexpr (std::is_same_v || std::is_same_v) + else if constexpr (std::is_same_v) type_res = std::make_shared(); else type_res = std::make_shared(); @@ -1723,18 +1727,14 @@ public: type_res = std::make_shared(); else if constexpr (std::is_same_v) type_res = std::make_shared(); - else if constexpr (std::is_same_v || std::is_same_v) + else if constexpr (std::is_same_v) type_res = std::make_shared(); else type_res = std::make_shared(); } else if constexpr (is_div_int || is_div_int_or_zero) { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v || std::is_same_v) + if constexpr (std::is_same_v) type_res = std::make_shared(); else type_res = std::make_shared(); @@ -1744,9 +1744,7 @@ public: } else if constexpr (IsDataTypeDecimal) { - if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended) - type_res = std::make_shared(); - else if constexpr (is_div_int || is_div_int_or_zero) + if constexpr (is_div_int || is_div_int_or_zero) { if constexpr (std::is_same_v) type_res = std::make_shared(); @@ -1766,11 +1764,7 @@ public: type_res = std::make_shared(); else if constexpr (is_div_int || is_div_int_or_zero) { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) + if constexpr (std::is_same_v) type_res = std::make_shared(); else type_res = std::make_shared(); From 998c56fc3d3602a1151c7e310863e12666e595e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 30 Jan 2024 17:36:34 +0100 Subject: [PATCH 014/145] Move code --- src/Compression/CompressionCodecT64.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index 42c6a18aa77..3ddc56fe4f6 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -168,6 +168,7 @@ TypeIndex baseType(TypeIndex type_idx) return TypeIndex::Int16; case TypeIndex::Int32: case TypeIndex::Decimal32: + case TypeIndex::Date32: return TypeIndex::Int32; case TypeIndex::Int64: case TypeIndex::Decimal64: @@ -180,8 +181,6 @@ TypeIndex baseType(TypeIndex type_idx) case TypeIndex::Enum16: case TypeIndex::Date: return TypeIndex::UInt16; - case TypeIndex::Date32: - return TypeIndex::Int32; case TypeIndex::UInt32: case TypeIndex::DateTime: case TypeIndex::IPv4: From e2a66f8e6594fcb8c95f47a6f2670869c78a4a35 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 30 Jan 2024 16:39:52 +0000 Subject: [PATCH 015/145] Fix tests --- tests/queries/0_stateless/02010_array_index_bad_cast.sql | 1 + .../0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02010_array_index_bad_cast.sql b/tests/queries/0_stateless/02010_array_index_bad_cast.sql index 19c58bb28a7..42a6556fc77 100644 --- a/tests/queries/0_stateless/02010_array_index_bad_cast.sql +++ b/tests/queries/0_stateless/02010_array_index_bad_cast.sql @@ -1,2 +1,3 @@ -- This query throws exception about uncomparable data types (but at least it does not introduce bad cast in code). +SET allow_suspicious_low_cardinality_types=1; SELECT has(materialize(CAST(['2021-07-14'] AS Array(LowCardinality(Nullable(DateTime))))), materialize('2021-07-14'::DateTime64(7))); -- { serverError 44 } diff --git a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 index 79a7c654f10..95bac76c591 100644 --- a/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 +++ b/tests/queries/0_stateless/02797_join_nested_lowcardinality_convert.sql.j2 @@ -1,4 +1,4 @@ - +SET allow_suspicious_low_cardinality_types=1; DROP TABLE IF EXISTS test1__fuzz_36; DROP TABLE IF EXISTS test1__fuzz_38; From 0576aa2b7fd060c68f482f8205575bd904356ebe Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 30 Jan 2024 16:45:36 +0000 Subject: [PATCH 016/145] fix fuzzer --- src/Functions/FunctionBinaryArithmetic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 62a50f5e0c2..e31183573c3 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -1719,7 +1719,7 @@ public: } } else if constexpr (((IsDataTypeDecimal && IsFloatingPoint) || - (IsDataTypeDecimal && IsFloatingPoint)) && !(is_div_int || is_div_int_or_zero)) + (IsDataTypeDecimal && IsFloatingPoint))) { if constexpr ((is_div_int || is_div_int_or_zero) && IsDataTypeDecimal) { @@ -1760,7 +1760,7 @@ public: } else if constexpr (IsDataTypeDecimal) { - if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegral) + if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended) type_res = std::make_shared(); else if constexpr (is_div_int || is_div_int_or_zero) { From aaed83541517e23137425ed5ed7e978a4f89f168 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 31 Jan 2024 12:59:26 +0000 Subject: [PATCH 017/145] Validate variants, use new validation on create queries --- src/Interpreters/InterpreterCreateQuery.cpp | 64 ++----------------- .../parseColumnsListForTableFunction.cpp | 6 ++ .../0_stateless/02981_nested_bad_types.sql | 41 ++++++++++-- 3 files changed, 46 insertions(+), 65 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a87464eb5de..a4d93eb623b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -50,6 +50,7 @@ #include #include #include +#include #include @@ -910,66 +911,13 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat const auto & settings = getContext()->getSettingsRef(); - /// Check low cardinality types in creating table if it was not allowed in setting - if (!create.attach && !settings.allow_suspicious_low_cardinality_types && !create.is_materialized_view) + /// If it's not attach and not materialized view to existing table, + /// we need to validate data types (check for experimental or suspicious types). + if (!create.attach && !create.is_materialized_view) { + DataTypeValidationSettings validation_settings(settings); for (const auto & name_and_type_pair : properties.columns.getAllPhysical()) - { - if (const auto * current_type_ptr = typeid_cast(name_and_type_pair.type.get())) - { - if (!isStringOrFixedString(*removeNullable(current_type_ptr->getDictionaryType()))) - throw Exception(ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, - "Creating columns of type {} is prohibited by default " - "due to expected negative impact on performance. " - "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", - current_type_ptr->getName()); - } - } - } - - if (!create.attach && !settings.allow_experimental_object_type) - { - for (const auto & [name, type] : properties.columns.getAllPhysical()) - { - if (type->hasDynamicSubcolumns()) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' which type is '{}' " - "because experimental Object type is not allowed. " - "Set setting allow_experimental_object_type = 1 in order to allow it", - name, type->getName()); - } - } - } - if (!create.attach && !settings.allow_suspicious_fixed_string_types) - { - for (const auto & [name, type] : properties.columns.getAllPhysical()) - { - auto basic_type = removeLowCardinalityAndNullable(type); - if (const auto * fixed_string = typeid_cast(basic_type.get())) - { - if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' which type is '{}' " - "because fixed string with size > {} is suspicious. " - "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", - name, type->getName(), MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); - } - } - } - if (!create.attach && !settings.allow_experimental_variant_type) - { - for (const auto & [name, type] : properties.columns.getAllPhysical()) - { - if (isVariant(type)) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Cannot create table with column '{}' which type is '{}' " - "because experimental Variant type is not allowed. " - "Set setting allow_experimental_variant_type = 1 in order to allow it", - name, type->getName()); - } - } + validateDataType(name_and_type_pair.type, validation_settings); } } diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index fcdad7c93c1..056674c4379 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -96,6 +97,11 @@ void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings validateDataType(map_type->getKeyType(), settings); validateDataType(map_type->getValueType(), settings); } + else if (const auto * variant_type = typeid_cast(type.get())) + { + for (const auto & variant : variant_type->getVariants()) + validateDataType(variant, settings); + } } ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context) diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql index 663d39cb1e2..8c0d2308d8f 100644 --- a/tests/queries/0_stateless/02981_nested_bad_types.sql +++ b/tests/queries/0_stateless/02981_nested_bad_types.sql @@ -8,14 +8,34 @@ select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPIC select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + +create table test (x Array(LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Array(Array(LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Map(String, LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Map(String, Map(String, LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Tuple(String, LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Tuple(String, Array(Map(String, LowCardinality(UInt64))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + + + +select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select [[['42']]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select map('a', '42')::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select map('a', map('b', ['42']))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select tuple('a', '42')::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} +select '42'::Variant(UInt64, FixedString(1000000)) settings allow_experimental_variant_type=1; -- {serverError ILLEGAL_COLUMN} + + +create table test (x Array(FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Array(Array(FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, Map(String, FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} -select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} -select [42]::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} -select [[[42]]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} -select map('a', 42)::Map(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} -select map('a', map('b', [42]))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} -select tuple('a', 42)::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} -select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} @@ -25,3 +45,10 @@ select map('a', map('b', [42]))::Map(String, Map(String, Array(Variant(String, U select tuple('a', 42)::Tuple(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} +create table test (x Array(Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Array(Array(Variant(String, UInt64)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Map(String, Map(String, Variant(String, UInt64)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Array(Map(String, Variant(String, UInt64))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} + From a91101a21f4d0af445fcc9346bdd65d1f1ac7258 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 31 Jan 2024 07:44:17 +0000 Subject: [PATCH 018/145] Working implementation --- src/Coordination/Changelog.cpp | 859 +++++++++++++++------ src/Coordination/Changelog.h | 139 +++- src/Coordination/CoordinationSettings.cpp | 5 + src/Coordination/CoordinationSettings.h | 5 +- src/Coordination/FourLetterCommand.cpp | 6 + src/Coordination/Keeper4LWInfo.h | 14 +- src/Coordination/KeeperLogStore.cpp | 12 + src/Coordination/KeeperLogStore.h | 6 +- src/Coordination/KeeperServer.cpp | 12 +- src/Coordination/KeeperSnapshotManager.cpp | 5 +- src/Coordination/KeeperStateMachine.cpp | 91 ++- 11 files changed, 850 insertions(+), 304 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 752251a3838..1d7aa62b1d1 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1,6 +1,11 @@ +#include +#include #include #include +#include #include +#include +#include #include #include #include @@ -15,7 +20,10 @@ #include #include #include +#include #include +#include +#include namespace DB @@ -188,9 +196,9 @@ public: } auto latest_log_disk = getLatestLogDisk(); - assert(file_description->disk == latest_log_disk); + chassert(file_description->disk == latest_log_disk); file_buf = latest_log_disk->writeFile(file_description->path, DBMS_DEFAULT_BUFFER_SIZE, mode); - assert(file_buf); + chassert(file_buf); last_index_written.reset(); current_file_description = std::move(file_description); @@ -247,7 +255,7 @@ public: } auto & write_buffer = getBuffer(); - auto current_position = write_buffer.count(); + auto current_position = initial_file_size + write_buffer.count(); writeIntBinary(computeRecordChecksum(record), write_buffer); writeIntBinary(record.header.version, write_buffer); @@ -268,7 +276,11 @@ public: else { unflushed_indices_with_log_location.emplace_back( - record.header.index, LogLocation{.file_description = current_file_description, .position = current_position}); + record.header.index, + LogLocation{ + .file_description = current_file_description, + .position = current_position, + .size = record.header.blob_size}); } last_index_written = record.header.index; @@ -479,6 +491,9 @@ private: LoggerPtr const log; }; +namespace +{ + struct ChangelogReadResult { /// Total entries read from log including skipped. @@ -504,9 +519,6 @@ struct ChangelogReadResult bool error; }; -namespace -{ - ChangelogRecord readChangelogRecord(ReadBuffer & read_buf, const std::string & filepath) { /// Read checksum @@ -557,6 +569,11 @@ LogEntryPtr logEntryFromRecord(const ChangelogRecord & record) return nuraft::cs_new(record.header.term, record.blob, static_cast(record.header.value_type)); } +size_t logEntrySize(nuraft::log_entry & log_entry) +{ + return log_entry.get_buf().size(); +} + } class ChangelogReader @@ -585,7 +602,7 @@ public: /// Check for duplicated changelog ids if (entry_storage.contains(record.header.index)) - entry_storage.eraseIf([&record](const auto index) { return index >= record.header.index; }); + entry_storage.cleanAfter(record.header.index + 1); result.total_entries_read_from_log += 1; @@ -598,13 +615,14 @@ public: if (result.first_read_index == 0) result.first_read_index = record.header.index; - auto log_size = read_buf->count() - result.last_position; - /// Put it into in memory structure entry_storage.addEntryWithLocation( record.header.index, log_entry, - LogLocation{.file_description = changelog_description, .position = static_cast(result.last_position), .size = log_size}); + LogLocation{ + .file_description = changelog_description, + .position = static_cast(result.last_position), + .size = record.header.blob_size}); result.last_read_index = record.header.index; if (result.total_entries_read_from_log % 50000 == 0) @@ -636,28 +654,140 @@ private: std::unique_ptr read_buf; }; -LogEntryStorage::LogEntryStorage(const LogFileSettings & log_settings) +LogEntryStorage::LogEntryStorage(const LogFileSettings & log_settings, KeeperContextPtr keeper_context_) : latest_logs_cache(log_settings.latest_logs_cache_size_threshold) , commit_logs_cache(log_settings.commit_logs_cache_size_threshold) + , prefetch_queue(std::numeric_limits::max()) + , keeper_context(std::move(keeper_context_)) + , log(getLogger("Changelog")) { - + commit_logs_prefetcher = std::make_unique([this] { prefetchCommitLogs(); }); } -size_t LogEntryStorage::size() const +LogEntryStorage::~LogEntryStorage() { - return total_entries; + shutdown(); +} + +void LogEntryStorage::prefetchCommitLogs() +{ + std::shared_ptr prefetch_info; + while (prefetch_queue.pop(prefetch_info)) + { + if (prefetch_info->cancel) + { + prefetch_info->done = true; + prefetch_info->done.notify_all(); + continue; + } + + auto current_index = prefetch_info->commit_prefetch_index_range.first; + try + { + for (const auto & prefetch_file_info : prefetch_info->file_infos) + { + const auto & [changelog_description, position, count] = prefetch_file_info; + std::lock_guard file_lock(changelog_description->file_mutex); + auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); + file->seek(position, SEEK_SET); + LOG_TRACE(log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position); + + for (size_t i = 0; i < count; ++i) + { + if (prefetch_info->cancel) + break; + + auto record = readChangelogRecord(*file, changelog_description->path); + auto entry = logEntryFromRecord(record); + if (current_index != record.header.index) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid index prefetched, expected {}, actual {}", current_index, record.header.index); + + commit_logs_cache.setPrefetchedEntry(record.header.index, std::move(entry), nullptr); + ++current_index; + } + + if (prefetch_info->cancel) + break; + } + } + catch (...) + { + tryLogCurrentException(log, "While prefetching log entries"); + auto exception = std::current_exception(); + + for (; current_index <= prefetch_info->commit_prefetch_index_range.second; ++current_index) + commit_logs_cache.setPrefetchedEntry(current_index, nullptr, exception); + } + + prefetch_info->done = true; + prefetch_info->done.notify_all(); + } +} + +void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) const +{ + if (keeper_context->isShutdownCalled()) + return; + + if (!commit_logs_cache.empty() && commit_logs_cache.max_index_in_cache != last_committed_index) + return; + + if (logs_location.empty()) + return; + + /// we are already prefetching some logs for commit + if (current_prefetch_info && !current_prefetch_info->done) + return; + + auto new_prefetch_info = std::make_shared(); + auto & [prefetch_from, prefetch_to] = new_prefetch_info->commit_prefetch_index_range; + size_t current_index = commit_logs_cache.cache.empty() ? last_committed_index + 1 : commit_logs_cache.max_index_in_cache + 1; + prefetch_from = current_index; + size_t total_size = 0; + std::vector file_infos; + FileReadInfo * current_file_info = nullptr; + for (; latest_logs_cache.empty() || current_index < latest_logs_cache.min_index_in_cache; ++current_index) + { + const auto & [changelog_description, position, size] = logs_location.at(current_index); + if (total_size == 0) + current_file_info = &file_infos.emplace_back(changelog_description, position, /* count */ 1); + else if (total_size + size > commit_logs_cache.size_threshold) + break; + else if (changelog_description == current_file_info->file_description) + ++current_file_info->count; + else + current_file_info = &file_infos.emplace_back(changelog_description, position, /* count */ 1); + + total_size += size; + commit_logs_cache.addPrefetchedEntry(current_index, size); + } + + if (!file_infos.empty()) + { + current_prefetch_info = std::move(new_prefetch_info); + prefetch_to = current_index - 1; + LOG_TRACE(log, "Will prefetch {} commit log entries [{} - {}]", prefetch_to - prefetch_from + 1, prefetch_from, prefetch_to); + + current_prefetch_info->file_infos = std::move(file_infos); + auto inserted = prefetch_queue.push(current_prefetch_info); + chassert(inserted); + } +} + +CacheEntry::CacheEntry(LogEntryPtr entry_) + : entry(std::move(entry_)) +{ + if (entry == nullptr) + is_prefetched = true; } LogEntryStorage::InMemoryCache::InMemoryCache(size_t size_threshold_) : size_threshold(size_threshold_) {} -void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, const LogEntryPtr & log_entry) +void LogEntryStorage::InMemoryCache::updateStatsWithNewEntry(uint64_t index, size_t size) { - auto [_, inserted] = cache.emplace(index, log_entry); - if (!inserted) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); - cache_size += log_entry->get_buf_ptr()->size(); + cache_size += size; if (cache.size() == 1) { @@ -670,52 +800,157 @@ void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, const LogEntryPtr } } -void LogEntryStorage::InMemoryCache::addEntry(IndexToLogEntryNode && node) +void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, LogEntryPtr log_entry) +{ + auto entry_size = logEntrySize(*log_entry); + auto [_, inserted] = cache.emplace(index, std::move(log_entry)); + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); + updateStatsWithNewEntry(index, entry_size); +} + +void LogEntryStorage::InMemoryCache::addEntry(IndexToCacheEntryNode && node) { auto index = node.key(); - auto entry_size = node.mapped()->get_buf_ptr()->size(); + auto entry_size = logEntrySize(*node.mapped().entry); auto result = cache.insert(std::move(node)); if (!result.inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); - - cache_size += entry_size; - if (cache.size() == 1) - { - min_index_in_cache = index; - max_index_in_cache = index; - } - else - { - max_index_in_cache = index; - } + updateStatsWithNewEntry(index, entry_size); } -IndexToLogEntryNode LogEntryStorage::InMemoryCache::popOldestEntry() +void LogEntryStorage::InMemoryCache::addPrefetchedEntry(uint64_t index, size_t size) +{ + auto [_, inserted] = cache.emplace(index, nullptr); + if (!inserted) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); + updateStatsWithNewEntry(index, size); +} + +void LogEntryStorage::InMemoryCache::setPrefetchedEntry(uint64_t index, LogEntryPtr log_entry, std::exception_ptr exception) +{ + auto it = cache.find(index); + if (it == cache.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing expected index {} in cache", index); + + { + std::lock_guard lock(it->second.entry_mutex); + if (exception) + it->second.exception = exception; + else + it->second.entry = std::move(log_entry); + } + it->second.is_prefetched = false; + it->second.entry_prefetched_cv.notify_all(); +} + +IndexToCacheEntryNode LogEntryStorage::InMemoryCache::popOldestEntry() { auto node = cache.extract(min_index_in_cache); if (node.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't find the oldest entry of index {} in logs cache", min_index_in_cache); ++min_index_in_cache; - cache_size -= node.mapped()->get_buf_ptr()->size(); + cache_size -= logEntrySize(*node.mapped().entry); return node; } +bool LogEntryStorage::InMemoryCache::containsEntry(uint64_t index) const +{ + return !cache.empty() && index >= min_index_in_cache && index <= max_index_in_cache; +} + LogEntryPtr LogEntryStorage::InMemoryCache::getEntry(uint64_t index) const { - if (index < min_index_in_cache || index > max_index_in_cache) + if (!containsEntry(index)) return nullptr; auto it = cache.find(index); if (it == cache.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Index {} missing from cache while it should be present", index); - return it->second; + const auto & cache_entry = it->second; + if (cache_entry.is_prefetched) + { + std::unique_lock lock(cache_entry.entry_mutex); + cache_entry.entry_prefetched_cv.wait(lock, [&]{ return cache_entry.entry != nullptr; }); + } + + if (cache_entry.exception) + std::rethrow_exception(cache_entry.exception); + + return cache_entry.entry; +} + +void LogEntryStorage::InMemoryCache::cleanUpTo(uint64_t index) +{ + if (index <= min_index_in_cache) + return; + + if (index > max_index_in_cache) + { + cache.clear(); + cache_size = 0; + } + else + { + for (size_t i = min_index_in_cache; i < index; ++i) + { + auto it = cache.find(i); + if (it == cache.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i); + + cache_size -= logEntrySize(*it->second.entry); + cache.erase(it); + } + min_index_in_cache = index; + } +} + +void LogEntryStorage::InMemoryCache::cleanAfter(uint64_t index) +{ + if (index >= max_index_in_cache) + return; + + if (index < min_index_in_cache) + { + cache.clear(); + cache_size = 0; + } + else + { + for (size_t i = index + 1; i < max_index_in_cache; ++i) + { + auto it = cache.find(i); + if (it == cache.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i); + + cache_size -= logEntrySize(*it->second.entry); + cache.erase(it); + } + max_index_in_cache = index; + } +} + +void LogEntryStorage::InMemoryCache::clear() +{ + cache.clear(); + cache_size = 0; +} + +bool LogEntryStorage::InMemoryCache::empty() const +{ + return cache.empty(); +} + +size_t LogEntryStorage::InMemoryCache::numberOfEntries() const +{ + return cache.size(); } bool LogEntryStorage::InMemoryCache::hasSpaceAvailable(size_t log_entry_size) const { - return cache.empty() || cache_size + log_entry_size < size_threshold; + return size_threshold == 0 || empty() || cache_size + log_entry_size < size_threshold; } void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry) @@ -723,68 +958,167 @@ void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry) /// we update the cache for added entries on refreshCache call latest_logs_cache.addEntry(index, log_entry); - ++total_entries; + if (log_entry->get_val_type() == nuraft::conf) + { + latest_config = log_entry; + latest_config_index = index; + conf_logs_indices.insert(index); + } + + if (first_log_entry == nullptr) + { + first_log_index = index; + first_log_entry = log_entry; + } +} + +bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size) +{ + /// if commit logs cache is empty, we need it only if it's the next log to commit + if (commit_logs_cache.empty()) + return keeper_context->lastCommittedIndex() + 1 == index; + + return commit_logs_cache.max_index_in_cache == index - 1 && commit_logs_cache.hasSpaceAvailable(log_entry_size); } void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location) { - auto entry_size = log_entry->get_buf_ptr()->size(); + auto entry_size = logEntrySize(*log_entry); while (!latest_logs_cache.hasSpaceAvailable(entry_size)) { auto entry_handle = latest_logs_cache.popOldestEntry(); - if (commit_logs_cache.max_index_in_cache == entry_handle.key() - 1 && commit_logs_cache.hasSpaceAvailable(entry_handle.mapped()->get_buf_ptr()->size())) + size_t removed_entry_size = logEntrySize(*entry_handle.mapped().entry); + if (shouldMoveLogToCommitCache(entry_handle.key(), removed_entry_size)) commit_logs_cache.addEntry(std::move(entry_handle)); } latest_logs_cache.addEntry(index, log_entry); + logs_location.emplace(index, std::move(log_location)); + + if (log_entry->get_val_type() == nuraft::conf) + { + latest_config = log_entry; + latest_config_index = index; + conf_logs_indices.insert(index); + } } -void LogEntryStorage::eraseIf(std::function index_predicate) +void LogEntryStorage::cleanUpTo(uint64_t index) { - //std::erase_if(logs_cache, [&](const auto & item) { return index_predicate(item.first); }); + latest_logs_cache.cleanUpTo(index); + /// uncommitted logs should never be compacted so we don't have to handle + /// logs that are currently being prefetched + commit_logs_cache.cleanUpTo(index); + std::erase_if(logs_location, [&](const auto & item) { return item.first < index; }); + std::erase_if(conf_logs_indices, [&](const auto conf_index) { return conf_index < index; }); + if (auto it = std::max_element(conf_logs_indices.begin(), conf_logs_indices.end()); it != conf_logs_indices.end()) + { + latest_config_index = *it; + latest_config = getEntry(latest_config_index); + } + else + latest_config = nullptr; + +} + +void LogEntryStorage::cleanAfter(uint64_t index) +{ + latest_logs_cache.cleanAfter(index); + /// if we cleared all latest logs, there is a possibility we would need to clear commit logs + if (latest_logs_cache.empty()) + { + commit_logs_cache.getEntry(index); + if (current_prefetch_info && !current_prefetch_info->done) + { + auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range; + if (index >= prefetch_from && index <= prefetch_to) + { + current_prefetch_info->cancel = true; + current_prefetch_info->done.wait(false); + } + } + + commit_logs_cache.cleanAfter(index); + startCommitLogsPrefetch(keeper_context->lastCommittedIndex()); + } + + std::erase_if(logs_location, [&](const auto & item) { return item.first > index; }); + if (!logs_location.empty()) + max_index_with_location = index; + else if (latest_logs_cache.empty()) + /// if we don't store any logs, reset first log cache + first_log_entry = nullptr; + + std::erase_if(conf_logs_indices, [&](const auto conf_index) { return conf_index > index; }); + if (auto it = std::max_element(conf_logs_indices.begin(), conf_logs_indices.end()); it != conf_logs_indices.end()) + { + latest_config_index = *it; + latest_config = getEntry(latest_config_index); + } + else + latest_config = nullptr; } bool LogEntryStorage::contains(uint64_t index) const { - return logs_cache.contains(index); + return logs_location.contains(index) || latest_logs_cache.containsEntry(index); } LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const { - if (index >= min_index_in_cache) - return logs_cache.at(index); + auto last_committed_index = keeper_context->lastCommittedIndex(); + commit_logs_cache.cleanUpTo(last_committed_index); + startCommitLogsPrefetch(last_committed_index); - std::lock_guard lock(logs_location_mutex); + LogEntryPtr entry = nullptr; - if (auto it = logs_location.find(index); it != logs_location.end()) + if (latest_config != nullptr && index == latest_config_index) { - const auto & [changelog_description, position] = it->second; + entry = latest_config; + } + else if (first_log_entry != nullptr && index == first_log_index) + { + entry = first_log_entry; + } + else if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index)) + { + entry = std::move(entry_from_latest_cache); + } + else if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index)) + { + entry = std::move(entry_from_commit_cache); + } + else if (auto it = logs_location.find(index); it != logs_location.end()) + { + const auto & [changelog_description, position, size] = it->second; std::lock_guard file_lock(changelog_description->file_mutex); - //std::cout << "Reading from path " << changelog_description->path << std::endl; - auto file = changelog_description->disk->readFile(changelog_description->path); + auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); file->seek(position, SEEK_SET); + LOG_TRACE(log, "Reading log entry at index {} from path {}, position {}, size {}", index, changelog_description->path, position, size); auto record = readChangelogRecord(*file, changelog_description->path); - return logEntryFromRecord(record); + entry = logEntryFromRecord(record); } - else - std::cout << "Nothing found" << std::endl; - - return nullptr; + return entry; } void LogEntryStorage::clear() { - logs_cache.clear(); + latest_logs_cache.clear(); + commit_logs_cache.clear(); + logs_location.clear(); } LogEntryPtr LogEntryStorage::getLatestConfigChange() const { - for (const auto & [_, entry] : latest_logs_cache.logs_cache) - if (entry->get_val_type() == nuraft::conf) - return entry; - return nullptr; + return latest_config; +} + +void LogEntryStorage::cacheFirstLog(uint64_t first_index) +{ + first_log_entry = getEntry(first_index); + first_log_index = first_index; } void LogEntryStorage::addLogLocations(std::vector> indices_with_log_locations) @@ -798,97 +1132,131 @@ void LogEntryStorage::addLogLocations(std::vector new_unapplied_indices_with_log_locations; + { + std::lock_guard lock(logs_location_mutex); + new_unapplied_indices_with_log_locations.swap(unapplied_indices_with_log_locations); + } + + for (auto & [index, log_location] : new_unapplied_indices_with_log_locations) { logs_location.emplace(index, std::move(log_location)); max_index_with_location = index; } - for (size_t index = min_index_in_cache; index < max_index_to_remove; ++index) + while (latest_logs_cache.numberOfEntries() > 1 && latest_logs_cache.min_index_in_cache <= max_index_with_location + && latest_logs_cache.cache_size > latest_logs_cache.size_threshold) { - if (index <= max_index_with_location) - { - logs_cache.erase(index); - min_index_in_cache = index + 1; - } + auto node = latest_logs_cache.popOldestEntry(); + if (shouldMoveLogToCommitCache(node.key(), logEntrySize(*node.mapped().entry))) + commit_logs_cache.addEntry(std::move(node)); } - - unapplied_indices_with_log_locations.clear(); } LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end) const { LogEntriesPtr ret = nuraft::cs_new>>(); ret->reserve(end - start); - + /// we rely on fact that changelogs need to be written sequentially with /// no other writes between - struct ReadInfo + std::optional read_info; + const auto set_new_file = [&](const auto & log_location) { - ChangelogFileDescriptionPtr file_description; - size_t start_position = 0; - size_t count = 0; + read_info.emplace(); + read_info->file_description = log_location.file_description; + read_info->position = log_location.position; + read_info->count = 1; }; - /// we have to collect some logs from disks because they are not cached - if (start < min_index_in_cache) + const auto flush_file = [&] { - //std::cout << "Reading some from disk" << std::endl; - std::lock_guard logs_location_lock(logs_location_mutex); - std::vector read_infos; - for (uint64_t i = start; i < min_index_in_cache && i < end; ++i) + if (!read_info) + return; + + const auto & [file_description, start_position, count] = *read_info; + LOG_TRACE(log, "Reading from path {} {} entries", file_description->path, count); + std::lock_guard file_lock(file_description->file_mutex); + auto file = file_description->disk->readFile(file_description->path); + file->seek(start_position, SEEK_SET); + + for (size_t i = 0; i < count; ++i) + { + auto record = readChangelogRecord(*file, file_description->path); + ret->push_back(logEntryFromRecord(record)); + } + + read_info.reset(); + }; + + for (size_t i = start; i < end; ++i) + { + if (auto commit_cache_entry = commit_logs_cache.getEntry(i)) + { + flush_file(); + ret->push_back(std::move(commit_cache_entry)); + } + else if (auto latest_cache_entry = latest_logs_cache.getEntry(i)) + { + flush_file(); + ret->push_back(std::move(latest_cache_entry)); + } + else { const auto & log_location = logs_location.at(i); - const auto push_new_file = [&] - { - read_infos.push_back(ReadInfo - { - .file_description = log_location.file_description, - .start_position = log_location.position, - .count = 1, - }); - }; - if (read_infos.empty()) - push_new_file(); - else if (auto & last = read_infos.back(); log_location.file_description == last.file_description) - ++last.count; + if (!read_info) + set_new_file(log_location); + else if (read_info->file_description == log_location.file_description) + ++read_info->count; else - push_new_file(); - } - - for (const auto & [file_description, start_position, count] : read_infos) - { - std::cout << "Reading from path " << file_description->path << " " << count << " entries" << std::endl; - std::lock_guard file_lock(file_description->file_mutex); - auto file = file_description->disk->readFile(file_description->path); - file->seek(start_position, SEEK_SET); - - for (size_t i = 0; i < count; ++i) { - auto record = readChangelogRecord(*file, file_description->path); - ret->push_back(logEntryFromRecord(record)); + flush_file(); + set_new_file(log_location); } } - - start = min_index_in_cache; } - else - std::cout << "Nothing read from disk" << std::endl; - - for (uint64_t i = start; i < end; ++i) - ret->push_back(logs_cache.at(i)); + flush_file(); return ret; +} +void LogEntryStorage::getKeeperLogInfo(KeeperLogInfo & log_info) const +{ + log_info.latest_logs_cache_entries = latest_logs_cache.numberOfEntries(); + log_info.latest_logs_cache_size = latest_logs_cache.cache_size; + + log_info.commit_logs_cache_entries = commit_logs_cache.numberOfEntries(); + log_info.commit_logs_cache_size = commit_logs_cache.cache_size; +} + +bool LogEntryStorage::isConfLog(uint64_t index) const +{ + return conf_logs_indices.contains(index); +} + +void LogEntryStorage::shutdown() +{ + if (std::exchange(is_shutdown, true)) + return; + + if (!prefetch_queue.isFinished()) + prefetch_queue.finish(); + + if (current_prefetch_info) + { + current_prefetch_info->cancel = true; + current_prefetch_info->done.wait(false); + } + + if (commit_logs_prefetcher->joinable()) + commit_logs_prefetcher->join(); } Changelog::Changelog( @@ -897,117 +1265,125 @@ Changelog::Changelog( , rotate_interval(log_file_settings.rotate_interval) , compress_logs(log_file_settings.compress_logs) , log(log_) - , entry_storage(log_file_settings) + , entry_storage(log_file_settings, keeper_context_) , write_operations(std::numeric_limits::max()) , append_completion_queue(std::numeric_limits::max()) , keeper_context(std::move(keeper_context_)) , flush_settings(flush_settings_) { - if (auto latest_log_disk = getLatestLogDisk(); - log_file_settings.force_sync && dynamic_cast(latest_log_disk.get()) == nullptr) + try { - throw DB::Exception( - DB::ErrorCodes::BAD_ARGUMENTS, - "force_sync is set to true for logs but disk '{}' cannot satisfy such guarantee because it's not of type DiskLocal.\n" - "If you want to use force_sync and same disk for all logs, please set keeper_server.log_storage_disk to a local disk.\n" - "If you want to use force_sync and different disk only for old logs, please set 'keeper_server.log_storage_disk' to any " - "supported disk and 'keeper_server.latest_log_storage_disk' to a local disk.\n" - "Otherwise, disable force_sync", - latest_log_disk->getName()); - } - - /// Load all files on changelog disks - - std::unordered_set read_disks; - - const auto load_from_disk = [&](const auto & disk) - { - if (read_disks.contains(disk)) - return; - - LOG_TRACE(log, "Reading from disk {}", disk->getName()); - std::unordered_map incomplete_files; - - const auto clean_incomplete_file = [&](const auto & file_path) + if (auto latest_log_disk = getLatestLogDisk(); + log_file_settings.force_sync && dynamic_cast(latest_log_disk.get()) == nullptr) { - if (auto incomplete_it = incomplete_files.find(fs::path(file_path).filename()); incomplete_it != incomplete_files.end()) + throw DB::Exception( + DB::ErrorCodes::BAD_ARGUMENTS, + "force_sync is set to true for logs but disk '{}' cannot satisfy such guarantee because it's not of type DiskLocal.\n" + "If you want to use force_sync and same disk for all logs, please set keeper_server.log_storage_disk to a local disk.\n" + "If you want to use force_sync and different disk only for old logs, please set 'keeper_server.log_storage_disk' to any " + "supported disk and 'keeper_server.latest_log_storage_disk' to a local disk.\n" + "Otherwise, disable force_sync", + latest_log_disk->getName()); + } + + /// Load all files on changelog disks + + std::unordered_set read_disks; + + const auto load_from_disk = [&](const auto & disk) + { + if (read_disks.contains(disk)) + return; + + LOG_TRACE(log, "Reading from disk {}", disk->getName()); + std::unordered_map incomplete_files; + + const auto clean_incomplete_file = [&](const auto & file_path) { - LOG_TRACE(log, "Removing {} from {}", file_path, disk->getName()); - disk->removeFile(file_path); - disk->removeFile(incomplete_it->second); - incomplete_files.erase(incomplete_it); - return true; + if (auto incomplete_it = incomplete_files.find(fs::path(file_path).filename()); incomplete_it != incomplete_files.end()) + { + LOG_TRACE(log, "Removing {} from {}", file_path, disk->getName()); + disk->removeFile(file_path); + disk->removeFile(incomplete_it->second); + incomplete_files.erase(incomplete_it); + return true; + } + + return false; + }; + + std::vector changelog_files; + for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) + { + const auto & file_name = it->name(); + if (file_name == changelogs_detached_dir) + continue; + + if (file_name.starts_with(tmp_prefix)) + { + incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path()); + continue; + } + + if (file_name.starts_with(DEFAULT_PREFIX)) + { + if (!clean_incomplete_file(it->path())) + changelog_files.push_back(it->path()); + } + else + { + LOG_WARNING(log, "Unknown file found in log directory: {}", file_name); + } } - return false; + for (const auto & changelog_file : changelog_files) + { + if (clean_incomplete_file(fs::path(changelog_file).filename())) + continue; + + auto file_description = getChangelogFileDescription(changelog_file); + file_description->disk = disk; + + LOG_TRACE(log, "Found {} on {}", changelog_file, disk->getName()); + auto [changelog_it, inserted] = existing_changelogs.insert_or_assign(file_description->from_log_index, std::move(file_description)); + + if (!inserted) + LOG_WARNING(log, "Found duplicate entries for {}, will use the entry from {}", changelog_it->second->path, disk->getName()); + } + + for (const auto & [name, path] : incomplete_files) + disk->removeFile(path); + + read_disks.insert(disk); }; - std::vector changelog_files; - for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) - { - const auto & file_name = it->name(); - if (file_name == changelogs_detached_dir) - continue; + /// Load all files from old disks + for (const auto & disk : keeper_context->getOldLogDisks()) + load_from_disk(disk); - if (file_name.starts_with(tmp_prefix)) - { - incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path()); - continue; - } - - if (file_name.starts_with(DEFAULT_PREFIX)) - { - if (!clean_incomplete_file(it->path())) - changelog_files.push_back(it->path()); - } - else - { - LOG_WARNING(log, "Unknown file found in log directory: {}", file_name); - } - } - - for (const auto & changelog_file : changelog_files) - { - if (clean_incomplete_file(fs::path(changelog_file).filename())) - continue; - - auto file_description = getChangelogFileDescription(changelog_file); - file_description->disk = disk; - - LOG_TRACE(log, "Found {} on {}", changelog_file, disk->getName()); - auto [changelog_it, inserted] = existing_changelogs.insert_or_assign(file_description->from_log_index, std::move(file_description)); - - if (!inserted) - LOG_WARNING(log, "Found duplicate entries for {}, will use the entry from {}", changelog_it->second->path, disk->getName()); - } - - for (const auto & [name, path] : incomplete_files) - disk->removeFile(path); - - read_disks.insert(disk); - }; - - /// Load all files from old disks - for (const auto & disk : keeper_context->getOldLogDisks()) + auto disk = getDisk(); load_from_disk(disk); - auto disk = getDisk(); - load_from_disk(disk); + auto latest_log_disk = getLatestLogDisk(); + if (disk != latest_log_disk) + load_from_disk(latest_log_disk); - auto latest_log_disk = getLatestLogDisk(); - if (disk != latest_log_disk) - load_from_disk(latest_log_disk); + if (existing_changelogs.empty()) + LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath()); - if (existing_changelogs.empty()) - LOG_WARNING(log, "No logs exists in {}. It's Ok if it's the first run of clickhouse-keeper.", disk->getPath()); + clean_log_thread = std::make_unique([this] { cleanLogThread(); }); - clean_log_thread = ThreadFromGlobalPool([this] { cleanLogThread(); }); + write_thread = std::make_unique([this] { writeThread(); }); - write_thread = ThreadFromGlobalPool([this] { writeThread(); }); + append_completion_thread = std::make_unique([this] { appendCompletionThread(); }); - append_completion_thread = ThreadFromGlobalPool([this] { appendCompletionThread(); }); - - current_writer = std::make_unique(existing_changelogs, entry_storage, keeper_context, log_file_settings); + current_writer = std::make_unique(existing_changelogs, entry_storage, keeper_context, log_file_settings); + } + catch (...) + { + tryLogCurrentException(log); + throw; + } } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) @@ -1162,13 +1538,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin remove_invalid_logs(); description->disk->removeFile(description->path); existing_changelogs.erase(last_log_read_result->log_start_index); - entry_storage.eraseIf([last_log_read_result](const auto index) { return index >= last_log_read_result->log_start_index; }); + entry_storage.cleanAfter(last_log_read_result->log_start_index - 1); } else if (last_log_read_result->error) { LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path); remove_invalid_logs(); - entry_storage.eraseIf([last_log_read_result](const auto index) { return index > last_log_read_result->last_read_index; }); + entry_storage.cleanAfter(last_log_read_result->log_start_index); move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index)); } /// don't mix compressed and uncompressed writes @@ -1203,6 +1579,8 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin moveFileBetweenDisks(description->disk, description, disk, description->path); } + if (size() != 0) + entry_storage.cacheFirstLog(min_log_id); initialized = true; } @@ -1306,7 +1684,7 @@ void Changelog::removeAllLogsAfter(uint64_t remove_after_log_start_index) LOG_WARNING(log, "Removing changelogs that go after broken changelog entry"); removeExistingLogs(start_to_remove_from_itr, existing_changelogs.end()); - entry_storage.eraseIf([start_to_remove_from_log_id](const auto index) { return index >= start_to_remove_from_log_id; }); + entry_storage.cleanAfter(start_to_remove_from_log_id - 1); } void Changelog::removeAllLogs() @@ -1457,10 +1835,13 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) if (!initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records"); - if (min_log_id == 0) - min_log_id = index; - entry_storage.addEntry(index, log_entry); + if (min_log_id == 0) + { + min_log_id = index; + entry_storage.cacheFirstLog(index); + } + max_log_id = index; if (!write_operations.push(AppendLog{index, log_entry})) @@ -1507,7 +1888,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) /// Remove redundant logs from memory /// Everything >= index must be removed - entry_storage.eraseIf([index](const auto current_index) { return current_index >= index; }); + entry_storage.cleanAfter(index - 1); /// Now we can actually override entry at index appendEntry(index, log_entry); @@ -1576,8 +1957,9 @@ void Changelog::compact(uint64_t up_to_log_index) } /// Compaction from the past is possible, so don't make our min_log_id smaller. min_log_id = std::max(min_log_id, up_to_log_index + 1); + entry_storage.cacheFirstLog(min_log_id); - entry_storage.eraseIf([up_to_log_index](const auto index) { return index <= up_to_log_index; }); + entry_storage.cleanUpTo(up_to_log_index + 1); if (need_rotate) current_writer->rotate(up_to_log_index + 1); @@ -1602,7 +1984,7 @@ LogEntriesPtr Changelog::getLogEntriesBetween(uint64_t start, uint64_t end) return entry_storage.getLogEntriesBetween(start, end); } -LogEntryPtr Changelog::entryAt(uint64_t index) +LogEntryPtr Changelog::entryAt(uint64_t index) const { return entry_storage.getEntry(index); } @@ -1655,13 +2037,19 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer) buffer.get(buf_local); LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local); - if (i == 0 && entry_storage.contains(cur_index)) + if (i == 0 && cur_index >= min_log_id && cur_index <= max_log_id) writeAt(cur_index, log_entry); else appendEntry(cur_index, log_entry); } } +bool Changelog::isConfLog(uint64_t index) const +{ + return entry_storage.isConfLog(index); + +} + bool Changelog::flush() { if (auto failed_ptr = flushAsync()) @@ -1687,7 +2075,7 @@ std::shared_ptr Changelog::flushAsync() if (!pushed) { - LOG_WARNING(log, "Changelog is shut down"); + LOG_INFO(log, "Changelog is shut down"); return nullptr; } @@ -1697,29 +2085,32 @@ std::shared_ptr Changelog::flushAsync() void Changelog::shutdown() { + LOG_DEBUG(log, "Shutting down Changelog"); if (!log_files_to_delete_queue.isFinished()) log_files_to_delete_queue.finish(); - if (clean_log_thread.joinable()) - clean_log_thread.join(); + if (clean_log_thread->joinable()) + clean_log_thread->join(); if (!write_operations.isFinished()) write_operations.finish(); - if (write_thread.joinable()) - write_thread.join(); + if (write_thread->joinable()) + write_thread->join(); if (!append_completion_queue.isFinished()) append_completion_queue.finish(); - if (append_completion_thread.joinable()) - append_completion_thread.join(); + if (append_completion_thread->joinable()) + append_completion_thread->join(); if (current_writer) { current_writer->finalize(); current_writer.reset(); } + + entry_storage.shutdown(); } Changelog::~Changelog() @@ -1768,4 +2159,22 @@ bool Changelog::isInitialized() const return initialized; } +void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const +{ + if (size() > 0) + { + log_info.first_log_idx = getStartIndex(); + auto first_entry = entryAt(log_info.first_log_idx); + chassert(first_entry != nullptr); + log_info.first_log_term = first_entry->get_term(); + + log_info.last_log_idx = max_log_id; + auto last_entry = entryAt(log_info.first_log_idx); + chassert(last_entry != nullptr); + log_info.last_log_term = last_entry->get_term(); + } + + entry_storage.getKeeperLogInfo(log_info); +} + } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 85ff2c48191..e4c3117e6cf 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -1,16 +1,25 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include -#include -#include + +#include +#include + +namespace nuraft +{ + struct log_entry; + struct buffer; + struct raft_server; +} + +namespace Poco +{ + class Logger; +} + +using LoggerPtr = std::shared_ptr; namespace DB { @@ -22,8 +31,11 @@ using LogEntries = std::vector; using LogEntriesPtr = nuraft::ptr; using BufferPtr = nuraft::ptr; -using IndexToLogEntry = std::unordered_map; -using IndexToLogEntryNode = typename IndexToLogEntry::node_type; +struct KeeperLogInfo; +class KeeperContext; +using KeeperContextPtr = std::shared_ptr; +class IDisk; +using DiskPtr = std::shared_ptr; enum class ChangelogVersion : uint8_t { @@ -97,20 +109,38 @@ struct LogLocation size_t size; }; +struct CacheEntry +{ + explicit CacheEntry(LogEntryPtr entry_); + + LogEntryPtr entry = nullptr; + std::atomic is_prefetched = false; + mutable std::mutex entry_mutex; + mutable std::condition_variable entry_prefetched_cv; + std::exception_ptr exception; +}; + +using IndexToCacheEntry = std::unordered_map; +using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type; + + struct LogEntryStorage { - explicit LogEntryStorage(const LogFileSettings & log_settings); + explicit LogEntryStorage(const LogFileSettings & log_settings, KeeperContextPtr keeper_context_); - size_t size() const; + ~LogEntryStorage(); void addEntry(uint64_t index, const LogEntryPtr & log_entry); void addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location); - void eraseIf(std::function index_predicate); + void cleanUpTo(uint64_t index); + void cleanAfter(uint64_t index); bool contains(uint64_t index) const; LogEntryPtr getEntry(uint64_t index) const; void clear(); LogEntryPtr getLatestConfigChange() const; + void cacheFirstLog(uint64_t first_index); + using IndexWithLogLocation = std::pair; void addLogLocations(std::vector indices_with_log_locations); @@ -118,19 +148,40 @@ struct LogEntryStorage void refreshCache(); LogEntriesPtr getLogEntriesBetween(uint64_t start, uint64_t end) const; + + void getKeeperLogInfo(KeeperLogInfo & log_info) const; + + bool isConfLog(uint64_t index) const; + + void shutdown(); private: + void prefetchCommitLogs(); + + void startCommitLogsPrefetch(uint64_t last_committed_index) const; + + bool shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size); + struct InMemoryCache { explicit InMemoryCache(size_t size_threshold_); - void addEntry(uint64_t index, const LogEntryPtr & log_entry); - void addEntry(IndexToLogEntryNode && node); - IndexToLogEntryNode popOldestEntry(); + void addEntry(uint64_t index, LogEntryPtr log_entry); + void addEntry(IndexToCacheEntryNode && node); + void addPrefetchedEntry(uint64_t index, size_t size); + void setPrefetchedEntry(uint64_t index, LogEntryPtr log_entry, std::exception_ptr exception); + void updateStatsWithNewEntry(uint64_t index, size_t size); + IndexToCacheEntryNode popOldestEntry(); + bool containsEntry(uint64_t index) const; LogEntryPtr getEntry(uint64_t index) const; + void cleanUpTo(uint64_t index); + void cleanAfter(uint64_t index); + bool empty() const; + size_t numberOfEntries() const; bool hasSpaceAvailable(size_t log_entry_size) const; + void clear(); /// Mapping log_id -> log_entry - IndexToLogEntry cache; + IndexToCacheEntry cache; size_t cache_size = 0; size_t min_index_in_cache = 0; size_t max_index_in_cache = 0; @@ -139,14 +190,44 @@ private: }; InMemoryCache latest_logs_cache; - InMemoryCache commit_logs_cache; + mutable InMemoryCache commit_logs_cache; - size_t total_entries = 0; + LogEntryPtr latest_config; + uint64_t latest_config_index = 0; + + LogEntryPtr first_log_entry; + uint64_t first_log_index = 0; + + std::unique_ptr commit_logs_prefetcher; + + struct FileReadInfo + { + ChangelogFileDescriptionPtr file_description; + size_t position; + size_t count; + }; + + struct PrefetchInfo + { + std::vector file_infos; + std::pair commit_prefetch_index_range; + std::atomic cancel; + std::atomic done = false; + }; + + mutable ConcurrentBoundedQueue> prefetch_queue; + mutable std::shared_ptr current_prefetch_info; mutable std::mutex logs_location_mutex; std::vector unapplied_indices_with_log_locations; std::unordered_map logs_location; size_t max_index_with_location = 0; + + std::unordered_set conf_logs_indices; + + bool is_shutdown = false; + KeeperContextPtr keeper_context; + LoggerPtr log; }; /// Simplest changelog with files rotation. @@ -190,7 +271,7 @@ public: LogEntriesPtr getLogEntriesBetween(uint64_t start_index, uint64_t end_index); /// Return entry at position index - LogEntryPtr entryAt(uint64_t index); + LogEntryPtr entryAt(uint64_t index) const; /// Serialize entries from index into buffer BufferPtr serializeEntriesToBuffer(uint64_t index, int32_t count); @@ -198,6 +279,8 @@ public: /// Apply entries from buffer overriding existing entries void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer); + bool isConfLog(uint64_t index) const; + /// Fsync latest log to disk and flush buffer bool flush(); @@ -205,7 +288,7 @@ public: void shutdown(); - uint64_t size() const { return entry_storage.size(); } + uint64_t size() const { return max_log_id - min_log_id + 1; } uint64_t lastDurableIndex() const { @@ -217,6 +300,8 @@ public: bool isInitialized() const; + void getKeeperLogInfo(KeeperLogInfo & log_info) const; + /// Fsync log to disk ~Changelog(); @@ -255,6 +340,8 @@ private: LogEntryStorage entry_storage; + std::unordered_set conf_logs_indices; + /// Start log_id which exists in all "active" logs /// min_log_id + 1 == max_log_id means empty log storage for NuRaft uint64_t min_log_id = 0; @@ -262,7 +349,7 @@ private: /// For compaction, queue of delete not used logs /// 128 is enough, even if log is not removed, it's not a problem ConcurrentBoundedQueue> log_files_to_delete_queue{128}; - ThreadFromGlobalPool clean_log_thread; + std::unique_ptr clean_log_thread; struct AppendLog { @@ -280,7 +367,7 @@ private: void writeThread(); - ThreadFromGlobalPool write_thread; + std::unique_ptr write_thread; ConcurrentBoundedQueue write_operations; /// Append log completion callback tries to acquire NuRaft's global lock @@ -289,7 +376,7 @@ private: /// For those reasons we call the completion callback in a different thread void appendCompletionThread(); - ThreadFromGlobalPool append_completion_thread; + std::unique_ptr append_completion_thread; ConcurrentBoundedQueue append_completion_queue; // last_durable_index needs to be exposed through const getter so we make mutex mutable diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 2436d730ae4..61ecd40ee7f 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -34,6 +34,11 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco e.addMessage("in Coordination settings config"); throw; } + + /// for backwards compatibility we set max_requests_append_size to max_requests_batch_size + /// if max_requests_append_size was not changed + if (!max_requests_append_size.changed) + max_requests_append_size = max_requests_batch_size; } diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 0ef9f21d2f2..cf1dcb6d2b1 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -41,6 +41,7 @@ struct Settings; M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \ M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \ M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \ + M(UInt64, max_requests_append_size, 100, "Max size of batch of requests that can be sent to replica in append request", 0) \ M(UInt64, max_flush_batch_size, 1000, "Max size of batch of requests that can be flushed together", 0) \ M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \ M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \ @@ -53,8 +54,8 @@ struct Settings; M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \ - M(UInt64, latest_logs_cache_size_threshold, 50 * 1024 * 1024, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ - M(UInt64, commit_logs_cache_size_threshold, 10 * 1024 * 1024, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) + M(UInt64, latest_logs_cache_size_threshold, 1 * 1024 * 1024 * 1024, "Maximum total size of in-memory cache of latest log entries.", 0) \ + M(UInt64, commit_logs_cache_size_threshold, 500 * 1024 * 1024, "Maximum total size of in-memory cache of log entries needed next for commit.", 0) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 4862acd448f..18d12cef8e2 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -561,6 +561,12 @@ String LogInfoCommand::run() append("leader_committed_log_idx", log_info.leader_committed_log_idx); append("target_committed_log_idx", log_info.target_committed_log_idx); append("last_snapshot_idx", log_info.last_snapshot_idx); + + append("latest_logs_cache_entries", log_info.latest_logs_cache_entries); + append("latest_logs_cache_size", log_info.latest_logs_cache_size); + + append("commit_logs_cache_entries", log_info.commit_logs_cache_entries); + append("commit_logs_cache_size", log_info.commit_logs_cache_size); return ret.str(); } diff --git a/src/Coordination/Keeper4LWInfo.h b/src/Coordination/Keeper4LWInfo.h index f99be0682ce..80b00b3f36e 100644 --- a/src/Coordination/Keeper4LWInfo.h +++ b/src/Coordination/Keeper4LWInfo.h @@ -52,16 +52,16 @@ struct Keeper4LWInfo struct KeeperLogInfo { /// My first log index in log store. - uint64_t first_log_idx; + uint64_t first_log_idx{0}; /// My first log term. - uint64_t first_log_term; + uint64_t first_log_term{0}; /// My last log index in log store. - uint64_t last_log_idx; + uint64_t last_log_idx{0}; /// My last log term. - uint64_t last_log_term; + uint64_t last_log_term{0}; /// My last committed log index in state machine. uint64_t last_committed_log_idx; @@ -74,6 +74,12 @@ struct KeeperLogInfo /// The largest committed log index in last snapshot. uint64_t last_snapshot_idx; + + uint64_t latest_logs_cache_entries; + uint64_t latest_logs_cache_size; + + uint64_t commit_logs_cache_entries; + uint64_t commit_logs_cache_size; }; } diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp index ce7c715237e..e9a43ba1eff 100644 --- a/src/Coordination/KeeperLogStore.cpp +++ b/src/Coordination/KeeperLogStore.cpp @@ -66,6 +66,12 @@ nuraft::ptr KeeperLogStore::entry_at(uint64_t index) return changelog.entryAt(index); } +bool KeeperLogStore::is_conf(uint64_t index) +{ + std::lock_guard lock(changelog_lock); + return changelog.isConfLog(index); +} + uint64_t KeeperLogStore::term_at(uint64_t index) { std::lock_guard lock(changelog_lock); @@ -145,4 +151,10 @@ void KeeperLogStore::setRaftServer(const nuraft::ptr & raft return changelog.setRaftServer(raft_server); } +void KeeperLogStore::getKeeperLogInfo(KeeperLogInfo & log_info) const +{ + std::lock_guard lock(changelog_lock); + changelog.getKeeperLogInfo(log_info); +} + } diff --git a/src/Coordination/KeeperLogStore.h b/src/Coordination/KeeperLogStore.h index aa277f19d88..21d9479ee47 100644 --- a/src/Coordination/KeeperLogStore.h +++ b/src/Coordination/KeeperLogStore.h @@ -1,10 +1,10 @@ #pragma once #include -#include #include #include #include #include +#include #include namespace DB @@ -38,6 +38,8 @@ public: /// Return entry at index nuraft::ptr entry_at(uint64_t index) override; + bool is_conf(uint64_t index) override; + /// Term if the index uint64_t term_at(uint64_t index) override; @@ -72,6 +74,8 @@ public: void setRaftServer(const nuraft::ptr & raft_server); + void getKeeperLogInfo(KeeperLogInfo & log_info) const; + private: mutable std::mutex changelog_lock; LoggerPtr log; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 722b1303cc8..04126230263 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -1,6 +1,7 @@ #include #include +#include "Coordination/KeeperLogStore.h" #include "config.h" #include @@ -134,7 +135,7 @@ KeeperServer::KeeperServer( snapshots_queue_, coordination_settings, keeper_context, - config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr, + config.getBool("keeper_server.upload_snapshot_on_exit", false) ? &snapshot_manager_s3 : nullptr, commit_callback, checkAndGetSuperdigest(configuration_and_settings_->super_digest)); @@ -332,7 +333,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co params.auto_forwarding_req_timeout_ = getValueOrMaxInt32AndLogWarning(coordination_settings->operation_timeout_ms.totalMilliseconds() * 2, "operation_timeout_ms", log); params.max_append_size_ - = getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_batch_size, "max_requests_batch_size", log); + = getValueOrMaxInt32AndLogWarning(coordination_settings->max_requests_append_size, "max_requests_append_size", log); params.return_method_ = nuraft::raft_params::async_handler; @@ -427,6 +428,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo { state_machine->init(); + keeper_context->setLastCommitIndex(state_machine->last_commit_index()); state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items); auto log_store = state_manager->load_log_store(); @@ -1125,14 +1127,12 @@ KeeperLogInfo KeeperServer::getKeeperLogInfo() auto log_store = state_manager->load_log_store(); if (log_store) { - log_info.first_log_idx = log_store->start_index(); - log_info.first_log_term = log_store->term_at(log_info.first_log_idx); + const auto & keeper_log_storage = static_cast(*log_store); + keeper_log_storage.getKeeperLogInfo(log_info); } if (raft_instance) { - log_info.last_log_idx = raft_instance->get_last_log_idx(); - log_info.last_log_term = raft_instance->get_last_log_term(); log_info.last_committed_log_idx = raft_instance->get_committed_log_idx(); log_info.leader_committed_log_idx = raft_instance->get_leader_committed_log_idx(); log_info.target_committed_log_idx = raft_instance->get_target_committed_log_idx(); diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index f53b8031712..cbe89f53526 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -594,7 +594,7 @@ KeeperSnapshotManager::KeeperSnapshotManager( if (!inserted) LOG_WARNING( - getLogger("KeeperSnapshotManager"), + log, "Found another snapshots with last log idx {}, will use snapshot from disk {}", snapshot_up_to, disk->getName()); @@ -603,6 +603,9 @@ KeeperSnapshotManager::KeeperSnapshotManager( for (const auto & [name, path] : incomplete_files) disk->removeFile(path); + if (snapshot_files.empty()) + LOG_TRACE(log, "No snapshots were found on {}", disk->getName()); + read_disks.insert(disk); }; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 8d50f0a76b1..9acd0cb541c 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -11,6 +11,7 @@ #include #include #include +#include "Common/Exception.h" #include #include #include @@ -139,16 +140,18 @@ void assertDigest( const KeeperStorage::Digest & first, const KeeperStorage::Digest & second, const Coordination::ZooKeeperRequest & request, + uint64_t log_idx, bool committing) { if (!KeeperStorage::checkDigest(first, second)) { LOG_FATAL( getLogger("KeeperStateMachine"), - "Digest for nodes is not matching after {} request of type '{}'.\nExpected digest - {}, actual digest - {} (digest " - "{}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}", + "Digest for nodes is not matching after {} request of type '{}' at log index {}.\nExpected digest - {}, actual digest - {} " + "(digest {}). Keeper will terminate to avoid inconsistencies.\nExtra information about the request:\n{}", committing ? "committing" : "preprocessing", request.getOpNum(), + log_idx, first.value, second.value, first.version, @@ -296,12 +299,12 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__, "Failed to preprocess stored log, aborting to avoid inconsistent state"); + tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to preprocess stored log at index {}, aborting to avoid inconsistent state", request_for_session.log_idx)); std::abort(); } if (keeper_context->digestEnabled() && request_for_session.digest) - assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, false); + assertDigest(*request_for_session.digest, storage->getNodesDigest(false), *request_for_session.request, request_for_session.log_idx, false); return true; } @@ -408,48 +411,57 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n } }; - const auto op_num = request_for_session->request->getOpNum(); - if (op_num == Coordination::OpNum::SessionID) + try { - const Coordination::ZooKeeperSessionIDRequest & session_id_request - = dynamic_cast(*request_for_session->request); - int64_t session_id; - std::shared_ptr response = std::make_shared(); - response->internal_id = session_id_request.internal_id; - response->server_id = session_id_request.server_id; - KeeperStorage::ResponseForSession response_for_session; - response_for_session.session_id = -1; - response_for_session.response = response; - - std::lock_guard lock(storage_and_responses_lock); - session_id = storage->getSessionID(session_id_request.session_timeout_ms); - LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); - response->session_id = session_id; - try_push(response_for_session); - } - else - { - if (op_num == Coordination::OpNum::Close) + const auto op_num = request_for_session->request->getOpNum(); + if (op_num == Coordination::OpNum::SessionID) { - std::lock_guard lock(request_cache_mutex); - parsed_request_cache.erase(request_for_session->session_id); + const Coordination::ZooKeeperSessionIDRequest & session_id_request + = dynamic_cast(*request_for_session->request); + int64_t session_id; + std::shared_ptr response = std::make_shared(); + response->internal_id = session_id_request.internal_id; + response->server_id = session_id_request.server_id; + KeeperStorage::ResponseForSession response_for_session; + response_for_session.session_id = -1; + response_for_session.response = response; + + std::lock_guard lock(storage_and_responses_lock); + session_id = storage->getSessionID(session_id_request.session_timeout_ms); + LOG_DEBUG(log, "Session ID response {} with timeout {}", session_id, session_id_request.session_timeout_ms); + response->session_id = session_id; + try_push(response_for_session); + } + else + { + if (op_num == Coordination::OpNum::Close) + { + std::lock_guard lock(request_cache_mutex); + parsed_request_cache.erase(request_for_session->session_id); + } + + std::lock_guard lock(storage_and_responses_lock); + KeeperStorage::ResponsesForSessions responses_for_sessions + = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); + for (auto & response_for_session : responses_for_sessions) + try_push(response_for_session); + + if (keeper_context->digestEnabled() && request_for_session->digest) + assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, request_for_session->log_idx, true); } - std::lock_guard lock(storage_and_responses_lock); - KeeperStorage::ResponsesForSessions responses_for_sessions - = storage->processRequest(request_for_session->request, request_for_session->session_id, request_for_session->zxid); - for (auto & response_for_session : responses_for_sessions) - try_push(response_for_session); + ProfileEvents::increment(ProfileEvents::KeeperCommits); + last_committed_idx = log_idx; - if (keeper_context->digestEnabled() && request_for_session->digest) - assertDigest(*request_for_session->digest, storage->getNodesDigest(true), *request_for_session->request, true); + if (commit_callback) + commit_callback(log_idx, *request_for_session); + } + catch(...) + { + tryLogCurrentException(log, fmt::format("Failed to commit stored log at index {}", log_idx)); + throw; } - ProfileEvents::increment(ProfileEvents::KeeperCommits); - last_committed_idx = log_idx; - - if (commit_callback) - commit_callback(log_idx, *request_for_session); return nullptr; } @@ -497,6 +509,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s) ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys); last_committed_idx = s.get_last_log_idx(); + keeper_context->setLastCommitIndex(s.get_last_log_idx()); return true; } From c09921c147afe7435f15c4e404e40b4b42ea3256 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 31 Jan 2024 23:10:22 +0100 Subject: [PATCH 019/145] Update test --- .../0_stateless/02981_nested_bad_types.sql | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02981_nested_bad_types.sql b/tests/queries/0_stateless/02981_nested_bad_types.sql index 8c0d2308d8f..87bc80693c8 100644 --- a/tests/queries/0_stateless/02981_nested_bad_types.sql +++ b/tests/queries/0_stateless/02981_nested_bad_types.sql @@ -8,7 +8,6 @@ select map('a', 42)::Map(String, LowCardinality(UInt64)); -- {serverError SUSPIC select map('a', map('b', [42]))::Map(String, Map(String, Array(LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} select tuple('a', 42)::Tuple(String, LowCardinality(UInt64)); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, LowCardinality(UInt64)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} -select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} create table test (x Array(LowCardinality(UInt64))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} create table test (x Array(Array(LowCardinality(UInt64)))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} @@ -18,7 +17,6 @@ create table test (x Tuple(String, LowCardinality(UInt64))) engine=Memory; -- {s create table test (x Tuple(String, Array(Map(String, LowCardinality(UInt64))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} - select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} select ['42']::Array(FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} select [[['42']]]::Array(Array(Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} @@ -26,8 +24,6 @@ select map('a', '42')::Map(String, FixedString(1000000)); -- {serverError ILLEGA select map('a', map('b', ['42']))::Map(String, Map(String, Array(FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} select tuple('a', '42')::Tuple(String, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, FixedString(1000000)))); -- {serverError ILLEGAL_COLUMN} -select '42'::Variant(UInt64, FixedString(1000000)) settings allow_experimental_variant_type=1; -- {serverError ILLEGAL_COLUMN} - create table test (x Array(FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} create table test (x Array(Array(FixedString(1000000)))) engine=Memory; -- {serverError ILLEGAL_COLUMN} @@ -36,8 +32,6 @@ create table test (x Map(String, Map(String, FixedString(1000000)))) engine=Memo create table test (x Tuple(String, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} - -select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} select [42]::Array(Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} select [[[42]]]::Array(Array(Array(Variant(String, UInt64)))); -- {serverError ILLEGAL_COLUMN} select map('a', 42)::Map(String, Variant(String, UInt64)); -- {serverError ILLEGAL_COLUMN} @@ -52,3 +46,13 @@ create table test (x Map(String, Map(String, Variant(String, UInt64)))) engine=M create table test (x Tuple(String, Variant(String, UInt64))) engine=Memory; -- {serverError ILLEGAL_COLUMN} create table test (x Tuple(String, Array(Map(String, Variant(String, UInt64))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +set allow_experimental_variant_type=1; +select 42::Variant(String, LowCardinality(UInt64)) settings allow_experimental_variant_type=1; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +select tuple('a', [map('b', 42)])::Tuple(String, Array(Map(String, Variant(LowCardinality(UInt64), UInt8)))); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Variant(LowCardinality(UInt64), UInt8)) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +create table test (x Tuple(String, Array(Map(String, Variant(LowCardinality(UInt64), UInt8))))) engine=Memory; -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} + +select '42'::Variant(UInt64, FixedString(1000000)); -- {serverError ILLEGAL_COLUMN} +select tuple('a', [map('b', '42')])::Tuple(String, Array(Map(String, Variant(UInt32, FixedString(1000000))))); -- {serverError ILLEGAL_COLUMN} +create table test (x Variant(UInt64, FixedString(1000000))) engine=Memory; -- {serverError ILLEGAL_COLUMN} +create table test (x Tuple(String, Array(Map(String, FixedString(1000000))))) engine=Memory; -- {serverError ILLEGAL_COLUMN} From b53abf8186ac6e3834dafca2c93057d483c0a56b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Feb 2024 10:04:34 +0000 Subject: [PATCH 020/145] Finalize --- contrib/NuRaft | 2 +- src/Common/ProfileEvents.cpp | 5 ++++ src/Coordination/Changelog.cpp | 12 ++++++++ src/Coordination/Changelog.h | 29 ++++++++++++++++++- src/Coordination/CoordinationSettings.cpp | 2 +- src/Coordination/FourLetterCommand.cpp | 29 +++++++++++++++++++ src/Coordination/FourLetterCommand.h | 29 ++++++++++++++----- .../KeeperAsynchronousMetrics.cpp | 6 ++++ 8 files changed, 103 insertions(+), 11 deletions(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 1278e32bb0d..a44f99fbfb9 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63 +Subproject commit a44f99fbfb9bead06630afb0a4bef2bad48d6e4c diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 8782f895f3f..c797182819d 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -616,6 +616,11 @@ The server successfully detected this situation and will download merged part fr M(InterfacePostgreSQLReceiveBytes, "Number of bytes received through PostgreSQL interfaces") \ \ M(ParallelReplicasUsedCount, "Number of replicas used to execute a query with task-based parallel replicas") \ + \ + M(KeeperLogsEntryReadFromLatestCache, "Number of log entries in Keeper being read from latest logs cache") \ + M(KeeperLogsEntryReadFromCommitCache, "Number of log entries in Keeper being read from commit logs cache") \ + M(KeeperLogsEntryReadFromFile, "Number of log entries in Keeper being read directly from the changelog file") \ + M(KeeperLogsPrefetchedEntries, "Number of log entries in Keeper being prefetched from the changelog file") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 1d7aa62b1d1..fd86dffdd10 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -21,10 +21,18 @@ #include #include #include +#include #include #include #include +namespace ProfileEvents +{ + extern const Event KeeperLogsEntryReadFromLatestCache; + extern const Event KeeperLogsEntryReadFromCommitCache; + extern const Event KeeperLogsEntryReadFromFile; + extern const Event KeeperLogsPrefetchedEntries; +} namespace DB { @@ -691,6 +699,7 @@ void LogEntryStorage::prefetchCommitLogs() auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); file->seek(position, SEEK_SET); LOG_TRACE(log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position); + ProfileEvents::increment(ProfileEvents::KeeperLogsPrefetchedEntries, count); for (size_t i = 0; i < count; ++i) { @@ -1084,10 +1093,12 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const else if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index)) { entry = std::move(entry_from_latest_cache); + ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromLatestCache); } else if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index)) { entry = std::move(entry_from_commit_cache); + ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromCommitCache); } else if (auto it = logs_location.find(index); it != logs_location.end()) { @@ -1099,6 +1110,7 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const auto record = readChangelogRecord(*file, changelog_description->path); entry = logEntryFromRecord(record); + ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile); } return entry; } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index e4c3117e6cf..71507d67833 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -123,7 +123,34 @@ struct CacheEntry using IndexToCacheEntry = std::unordered_map; using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type; - +/** + * Storage for storing and handling deserialized entries from disk. + * It consists of 2 in-memory caches that rely heavily on the way + * entries are used in Raft. + * Random and repeated access to certain entries is almost never done so we can't implement a solution + * like LRU/SLRU cache because entries would be cached and never read again. + * Entries are often read sequentially for 2 cases: + * - for replication + * - for committing + * + * First cache will store latest logs in memory, limited by the latest_logs_cache_size_threshold coordination setting. + * Once the log is persisted to the disk, we store it's location in the file and allow the storage + * to evict that log from cache if it's needed. + * Latest logs cache should have a high hit rate in "normal" operation for both replication and committing. + * + * As we commit (and read) logs sequentially, we will try to read from latest logs cache. + * In some cases, latest logs could be ahead from last committed log by more than latest_logs_cache_size_threshold + * which means that for each commit we would need to read the log from disk. + * In case latest logs cache hits the threshold we have a second cache called commit logs cache limited by commit_logs_cache_size_threshold. + * If a log is evicted from the latest logs cache, we check if we can move it to commit logs cache to avoid re-reading the log from disk. + * If latest logs cache moves ahead of the commit log by a lot or commit log hits the threshold + * we cannot move the entries from latest logs and we will need to refill the commit cache from disk. + * To avoid reading entry by entry (which can have really bad effect on performance because we support disks based on S3), + * we try to prefetch multiple entries ahead of time because we know that they will be read by commit thread + * in the future. + * Commit logs cache should have a high hit rate if we start with a lot of unprocessed logs that cannot fit in the + * latest logs cache. + */ struct LogEntryStorage { explicit LogEntryStorage(const LogFileSettings & log_settings, KeeperContextPtr keeper_context_); diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 61ecd40ee7f..ea1acf02450 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -46,7 +46,7 @@ const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = #if USE_JEMALLOC "jmst,jmfp,jmep,jmdp," #endif -"conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld"; +"conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif,rqld,rclc,clrs,ftfl,ydld,pfev"; KeeperConfigurationAndSettings::KeeperConfigurationAndSettings() : server_id(NOT_EXIST) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 18d12cef8e2..09e99f69fd0 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include "Coordination/KeeperFeatureFlags.h" #include #include @@ -193,6 +194,8 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat FourLetterCommandPtr jemalloc_disable_profile = std::make_shared(keeper_dispatcher); factory.registerCommand(jemalloc_disable_profile); #endif + FourLetterCommandPtr profile_events_command = std::make_shared(keeper_dispatcher); + factory.registerCommand(profile_events_command); factory.initializeAllowList(keeper_dispatcher); factory.setInitialize(true); @@ -650,4 +653,30 @@ String JemallocDisableProfile::run() } #endif +String ProfileEventsCommand::run() +{ + StringBuffer ret; + + auto append = [&ret] (const String & metric, uint64_t value, const String & docs) -> void + { + writeText(metric, ret); + writeText('\t', ret); + writeText(std::to_string(value), ret); + writeText('\t', ret); + writeText(docs, ret); + writeText('\n', ret); + }; + + for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) + { + const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); + + std::string metric_name{ProfileEvents::getName(static_cast(i))}; + std::string metric_doc{ProfileEvents::getDocumentation(static_cast(i))}; + append(metric_name, counter, metric_doc); + } + + return ret.str(); +} + } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 7fc044881cf..82b30a0b5f6 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -1,18 +1,19 @@ #pragma once -#include -#include +#include "config.h" + #include - -#include -#include - -#include - +#include +#include namespace DB { +class WriteBufferFromOwnString; +class KeeperDispatcher; + +using String = std::string; + struct IFourLetterCommand; using FourLetterCommandPtr = std::shared_ptr; @@ -479,4 +480,16 @@ struct JemallocDisableProfile : public IFourLetterCommand }; #endif +struct ProfileEventsCommand : public IFourLetterCommand +{ + explicit ProfileEventsCommand(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "pfev"; } + String run() override; + ~ProfileEventsCommand() override = default; +}; + } diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 8f6e1dec6c1..242e8608337 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -99,6 +99,12 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM new_values["KeeperTargetCommitLogIdx"] = { keeper_log_info.target_committed_log_idx, "Index until which logs can be committed in ClickHouse Keeper." }; new_values["KeeperLastSnapshotIdx"] = { keeper_log_info.last_snapshot_idx, "Index of the last log present in the last created snapshot." }; + new_values["KeeperLatestLogsCacheEntries"] = {keeper_log_info.latest_logs_cache_entries, "Number of entries stored in the in-memory cache for latest logs"}; + new_values["KeeperLatestLogsCacheSize"] = {keeper_log_info.latest_logs_cache_size, "Total size of in-memory cache for latest logs"}; + + new_values["KeeperCommitLogsCacheEntries"] = {keeper_log_info.commit_logs_cache_entries, "Number of entries stored in the in-memory cache for next logs to be committed"}; + new_values["KeeperCommitLogsCacheSize"] = {keeper_log_info.commit_logs_cache_size, "Total size of in-memory cache for next logs to be committed"}; + auto & keeper_connection_stats = keeper_dispatcher.getKeeperConnectionStats(); new_values["KeeperMinLatency"] = { keeper_connection_stats.getMinLatency(), "Minimal request latency of ClickHouse Keeper." }; From 8afa7c4dc80805c3fda4a74376d3786f00c1bcbb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Feb 2024 10:18:07 +0000 Subject: [PATCH 021/145] Randomize cache sizes in tests --- tests/config/config.d/keeper_port.xml | 5 ++++- tests/config/install.sh | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index b724d5dd87e..0487ceed989 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -3,7 +3,7 @@ 9181 1 - 1 + 0 1 @@ -24,6 +24,9 @@ 0 1 + + 31557632 + 20623360 diff --git a/tests/config/install.sh b/tests/config/install.sh index cfe810cda84..9873af2f6cd 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -134,6 +134,12 @@ fi value=$(($RANDOM % 2)) sed --follow-symlinks -i "s|[01]|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml +value=$((($RANDOM + 100) * 2048)) +sed --follow-symlinks -i "s|[[:digit:]]\+|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml + +value=$((($RANDOM + 100) * 2048)) +sed --follow-symlinks -i "s|[[:digit:]]\+|$value|" $DEST_SERVER_PATH/config.d/keeper_port.xml + if [[ -n "$USE_POLYMORPHIC_PARTS" ]] && [[ "$USE_POLYMORPHIC_PARTS" -eq 1 ]]; then ln -sf $SRC_PATH/config.d/polymorphic_parts.xml $DEST_SERVER_PATH/config.d/ fi From c197fb27d6bfe8153dfa38e05db177682bc1b109 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Feb 2024 10:25:20 +0000 Subject: [PATCH 022/145] Better --- src/Coordination/Changelog.cpp | 8 ++++---- src/Coordination/Changelog.h | 6 +++--- src/Coordination/KeeperDispatcher.cpp | 4 +--- src/Coordination/KeeperStateMachine.cpp | 4 +++- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index fd86dffdd10..3b9b0b26d04 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -986,7 +986,7 @@ bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entr /// if commit logs cache is empty, we need it only if it's the next log to commit if (commit_logs_cache.empty()) return keeper_context->lastCommittedIndex() + 1 == index; - + return commit_logs_cache.max_index_in_cache == index - 1 && commit_logs_cache.hasSpaceAvailable(log_entry_size); } @@ -996,7 +996,7 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l while (!latest_logs_cache.hasSpaceAvailable(entry_size)) { auto entry_handle = latest_logs_cache.popOldestEntry(); - size_t removed_entry_size = logEntrySize(*entry_handle.mapped().entry); + size_t removed_entry_size = logEntrySize(*entry_handle.mapped().entry); if (shouldMoveLogToCommitCache(entry_handle.key(), removed_entry_size)) commit_logs_cache.addEntry(std::move(entry_handle)); } @@ -1257,14 +1257,14 @@ void LogEntryStorage::shutdown() { if (std::exchange(is_shutdown, true)) return; - + if (!prefetch_queue.isFinished()) prefetch_queue.finish(); if (current_prefetch_info) { current_prefetch_info->cancel = true; - current_prefetch_info->done.wait(false); + current_prefetch_info->done.wait(false); } if (commit_logs_prefetcher->joinable()) diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 71507d67833..5fdb1a27840 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -128,7 +128,7 @@ using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type; * It consists of 2 in-memory caches that rely heavily on the way * entries are used in Raft. * Random and repeated access to certain entries is almost never done so we can't implement a solution - * like LRU/SLRU cache because entries would be cached and never read again. + * like LRU/SLRU cache because entries would be cached and never read again. * Entries are often read sequentially for 2 cases: * - for replication * - for committing @@ -143,7 +143,7 @@ using IndexToCacheEntryNode = typename IndexToCacheEntry::node_type; * which means that for each commit we would need to read the log from disk. * In case latest logs cache hits the threshold we have a second cache called commit logs cache limited by commit_logs_cache_size_threshold. * If a log is evicted from the latest logs cache, we check if we can move it to commit logs cache to avoid re-reading the log from disk. - * If latest logs cache moves ahead of the commit log by a lot or commit log hits the threshold + * If latest logs cache moves ahead of the commit log by a lot or commit log hits the threshold * we cannot move the entries from latest logs and we will need to refill the commit cache from disk. * To avoid reading entry by entry (which can have really bad effect on performance because we support disks based on S3), * we try to prefetch multiple entries ahead of time because we know that they will be read by commit thread @@ -179,7 +179,7 @@ struct LogEntryStorage void getKeeperLogInfo(KeeperLogInfo & log_info) const; bool isConfLog(uint64_t index) const; - + void shutdown(); private: void prefetchCommitLogs(); diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index ab57ab7337d..1175ee9e95f 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -433,7 +433,7 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf snapshots_queue, keeper_context, snapshot_s3, - [this](uint64_t log_idx, const KeeperStorage::RequestForSession & request_for_session) + [this](uint64_t /*log_idx*/, const KeeperStorage::RequestForSession & request_for_session) { { /// check if we have queue of read requests depending on this request to be committed @@ -457,8 +457,6 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf } } } - - keeper_context->setLastCommitIndex(log_idx); }); try diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 84e86058516..8121a5ac6ce 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -452,11 +452,12 @@ nuraft::ptr KeeperStateMachine::commit(const uint64_t log_idx, n ProfileEvents::increment(ProfileEvents::KeeperCommits); last_committed_idx = log_idx; + keeper_context->setLastCommitIndex(log_idx); if (commit_callback) commit_callback(log_idx, *request_for_session); } - catch(...) + catch (...) { tryLogCurrentException(log, fmt::format("Failed to commit stored log at index {}", log_idx)); throw; @@ -520,6 +521,7 @@ void KeeperStateMachine::commit_config(const uint64_t log_idx, nuraft::ptrserialize(); cluster_config = ClusterConfig::deserialize(*tmp); last_committed_idx = log_idx; + keeper_context->setLastCommitIndex(log_idx); } void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) From 9b646b41d5dbb4a4f5af9be70101410ce1eab4a2 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 1 Feb 2024 13:52:56 +0100 Subject: [PATCH 023/145] Update src/Common/ISlotControl.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: János Benjamin Antal --- src/Common/ISlotControl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ISlotControl.h b/src/Common/ISlotControl.h index aa7414d5465..daeb956f5a8 100644 --- a/src/Common/ISlotControl.h +++ b/src/Common/ISlotControl.h @@ -16,9 +16,9 @@ namespace DB // * free: slot is available to be allocated. // * allocated: slot is allocated to a specific ISlotAllocation. // -// Allocated slots can be considered as: +// Allocated slots can be in one of the following states: // * granted: allocated, but not yet acquired. -// * acquired: acquired using IAcquiredSlot. +// * acquired: a granted slot becomes acquired by using IAcquiredSlot. // // Example for CPU (see ConcurrencyControl.h). Every slot represents one CPU in the system. // Slot allocation is a request to allocate specific number of CPUs for a specific query. From d485e36f20ce141c266d4ae4b53735daa2c1c3e7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 1 Feb 2024 16:57:46 +0100 Subject: [PATCH 024/145] Fix style --- src/Interpreters/InterpreterCreateQuery.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a4d93eb623b..5e63d580c8b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -99,7 +99,6 @@ namespace ErrorCodes extern const int DATABASE_ALREADY_EXISTS; extern const int BAD_ARGUMENTS; extern const int BAD_DATABASE_FOR_TEMPORARY_TABLE; - extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY; extern const int ILLEGAL_SYNTAX_FOR_DATA_TYPE; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_INDEX; From 533f7d03148dd9368e93ea28fe61ba2bdd2a837c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Feb 2024 16:09:00 +0000 Subject: [PATCH 025/145] Build fix --- src/Coordination/Changelog.cpp | 235 +++++++++++++----- src/Coordination/Changelog.h | 18 +- src/Coordination/InMemoryLogStore.cpp | 6 + src/Coordination/InMemoryLogStore.h | 2 + src/Coordination/tests/gtest_coordination.cpp | 7 + 5 files changed, 191 insertions(+), 77 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 3b9b0b26d04..a2b5905f776 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -507,17 +507,17 @@ struct ChangelogReadResult /// Total entries read from log including skipped. /// Useful when we decide to continue to write in the same log and want to know /// how many entries was already written in it. - uint64_t total_entries_read_from_log; + uint64_t total_entries_read_from_log{0}; /// First index in log - uint64_t log_start_index; + uint64_t log_start_index{0}; /// First entry actually read log (not including skipped) - uint64_t first_read_index; + uint64_t first_read_index{0}; /// Last entry read from log (last entry in log) /// When we don't skip anything last_read_index - first_read_index = total_entries_read_from_log. /// But when some entries from the start of log can be skipped because they are not required. - uint64_t last_read_index; + uint64_t last_read_index{0}; /// last offset we were able to read from log off_t last_position; @@ -610,7 +610,7 @@ public: /// Check for duplicated changelog ids if (entry_storage.contains(record.header.index)) - entry_storage.cleanAfter(record.header.index + 1); + entry_storage.cleanAfter(record.header.index - 1); result.total_entries_read_from_log += 1; @@ -814,7 +814,9 @@ void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, LogEntryPtr log_en auto entry_size = logEntrySize(*log_entry); auto [_, inserted] = cache.emplace(index, std::move(log_entry)); if (!inserted) + { throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); + } updateStatsWithNewEntry(index, entry_size); } @@ -826,6 +828,7 @@ void LogEntryStorage::InMemoryCache::addEntry(IndexToCacheEntryNode && node) auto result = cache.insert(std::move(node)); if (!result.inserted) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); + updateStatsWithNewEntry(index, entry_size); } @@ -833,7 +836,7 @@ void LogEntryStorage::InMemoryCache::addPrefetchedEntry(uint64_t index, size_t s { auto [_, inserted] = cache.emplace(index, nullptr); if (!inserted) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to set prefetched entry with index {} which is already present in cache", index); updateStatsWithNewEntry(index, size); } @@ -893,52 +896,51 @@ LogEntryPtr LogEntryStorage::InMemoryCache::getEntry(uint64_t index) const void LogEntryStorage::InMemoryCache::cleanUpTo(uint64_t index) { - if (index <= min_index_in_cache) + if (empty() || index <= min_index_in_cache) return; if (index > max_index_in_cache) { cache.clear(); cache_size = 0; + return; } - else - { - for (size_t i = min_index_in_cache; i < index; ++i) - { - auto it = cache.find(i); - if (it == cache.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i); - cache_size -= logEntrySize(*it->second.entry); - cache.erase(it); - } - min_index_in_cache = index; + for (size_t i = min_index_in_cache; i < index; ++i) + { + auto it = cache.find(i); + if (it == cache.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i); + + cache_size -= logEntrySize(*it->second.entry); + cache.erase(it); } + min_index_in_cache = index; } void LogEntryStorage::InMemoryCache::cleanAfter(uint64_t index) { - if (index >= max_index_in_cache) + if (empty() || index >= max_index_in_cache) return; if (index < min_index_in_cache) { cache.clear(); cache_size = 0; + return; } - else - { - for (size_t i = index + 1; i < max_index_in_cache; ++i) - { - auto it = cache.find(i); - if (it == cache.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i); - cache_size -= logEntrySize(*it->second.entry); - cache.erase(it); - } - max_index_in_cache = index; + for (size_t i = index + 1; i <= max_index_in_cache; ++i) + { + auto it = cache.find(i); + if (it == cache.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from cache", i); + + cache_size -= logEntrySize(*it->second.entry); + cache.erase(it); } + + max_index_in_cache = index; } void LogEntryStorage::InMemoryCache::clear() @@ -973,12 +975,6 @@ void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry) latest_config_index = index; conf_logs_indices.insert(index); } - - if (first_log_entry == nullptr) - { - first_log_index = index; - first_log_entry = log_entry; - } } bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size) @@ -1000,11 +996,14 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l if (shouldMoveLogToCommitCache(entry_handle.key(), removed_entry_size)) commit_logs_cache.addEntry(std::move(entry_handle)); } - latest_logs_cache.addEntry(index, log_entry); logs_location.emplace(index, std::move(log_location)); + if (logs_location.size() == 1) + min_index_with_location = index; + max_index_with_location = index; + if (log_entry->get_val_type() == nuraft::conf) { latest_config = log_entry; @@ -1019,7 +1018,28 @@ void LogEntryStorage::cleanUpTo(uint64_t index) /// uncommitted logs should never be compacted so we don't have to handle /// logs that are currently being prefetched commit_logs_cache.cleanUpTo(index); - std::erase_if(logs_location, [&](const auto & item) { return item.first < index; }); + + if (!logs_location.empty() && index > min_index_with_location) + { + if (index > max_index_with_location) + { + logs_location.clear(); + } + else + { + for (size_t i = min_index_with_location; i < index; ++i) + { + auto it = logs_location.find(i); + if (it == logs_location.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from logs location", i); + + logs_location.erase(it); + } + + min_index_with_location = index; + } + } + std::erase_if(conf_logs_indices, [&](const auto conf_index) { return conf_index < index; }); if (auto it = std::max_element(conf_logs_indices.begin(), conf_logs_indices.end()); it != conf_logs_indices.end()) { @@ -1029,6 +1049,8 @@ void LogEntryStorage::cleanUpTo(uint64_t index) else latest_config = nullptr; + if (first_log_index < index) + first_log_entry = nullptr; } void LogEntryStorage::cleanAfter(uint64_t index) @@ -1037,6 +1059,9 @@ void LogEntryStorage::cleanAfter(uint64_t index) /// if we cleared all latest logs, there is a possibility we would need to clear commit logs if (latest_logs_cache.empty()) { + /// we will clean everything after the index, if there is a prefetch in progress + /// wait until we fetch everything until index + /// afterwards we can stop prefetching of newer logs because they will be cleaned up commit_logs_cache.getEntry(index); if (current_prefetch_info && !current_prefetch_info->done) { @@ -1052,10 +1077,28 @@ void LogEntryStorage::cleanAfter(uint64_t index) startCommitLogsPrefetch(keeper_context->lastCommittedIndex()); } - std::erase_if(logs_location, [&](const auto & item) { return item.first > index; }); - if (!logs_location.empty()) - max_index_with_location = index; - else if (latest_logs_cache.empty()) + if (!logs_location.empty() && index < max_index_with_location) + { + if (index < min_index_with_location) + { + logs_location.clear(); + } + else + { + for (size_t i = index + 1; i <= max_index_with_location; ++i) + { + auto it = logs_location.find(i); + if (it == logs_location.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Log entry with index {} unexpectedly missing from logs location", i); + + logs_location.erase(it); + } + + max_index_with_location = index; + } + } + + if (empty()) /// if we don't store any logs, reset first log cache first_log_entry = nullptr; @@ -1067,6 +1110,9 @@ void LogEntryStorage::cleanAfter(uint64_t index) } else latest_config = nullptr; + + if (first_log_index > index) + first_log_entry = nullptr; } bool LogEntryStorage::contains(uint64_t index) const @@ -1110,6 +1156,13 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const auto record = readChangelogRecord(*file, changelog_description->path); entry = logEntryFromRecord(record); + + if (first_log_entry == nullptr && index == getFirstIndex()) + { + first_log_index = index; + first_log_entry = entry; + } + ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile); } return entry; @@ -1158,6 +1211,9 @@ void LogEntryStorage::refreshCache() for (auto & [index, log_location] : new_unapplied_indices_with_log_locations) { + if (logs_location.empty()) + min_index_with_location = index; + logs_location.emplace(index, std::move(log_location)); max_index_with_location = index; } @@ -1202,6 +1258,7 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end { auto record = readChangelogRecord(*file, file_description->path); ret->push_back(logEntryFromRecord(record)); + ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile); } read_info.reset(); @@ -1253,6 +1310,44 @@ bool LogEntryStorage::isConfLog(uint64_t index) const return conf_logs_indices.contains(index); } +size_t LogEntryStorage::empty() const +{ + return logs_location.empty() && latest_logs_cache.empty(); +} + +size_t LogEntryStorage::size() const +{ + if (empty()) + return 0; + + size_t min_index = 0; + size_t max_index = 0; + + if (!logs_location.empty()) + { + min_index = min_index_with_location; + max_index = max_index_with_location; + } + else + min_index = latest_logs_cache.min_index_in_cache; + + if (!latest_logs_cache.empty()) + max_index = latest_logs_cache.max_index_in_cache; + + return max_index - min_index + 1; +} + +size_t LogEntryStorage::getFirstIndex() const +{ + if (!logs_location.empty()) + return min_index_with_location; + + if (!latest_logs_cache.empty()) + return latest_logs_cache.min_index_in_cache; + + return 0; +} + void LogEntryStorage::shutdown() { if (std::exchange(is_shutdown, true)) @@ -1399,6 +1494,7 @@ Changelog::Changelog( } void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uint64_t logs_to_keep) +try { std::lock_guard writer_lock(writer_mutex); std::optional last_log_read_result; @@ -1440,7 +1536,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin changelog_description.from_log_index); /// Nothing to do with our more fresh log, leader will overwrite them, so remove everything and just start from last_commited_index removeAllLogs(); - min_log_id = last_commited_log_index; max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; current_writer->rotate(max_log_id + 1); initialized = true; @@ -1480,10 +1575,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin last_log_read_result->log_start_index = changelog_description.from_log_index; - /// Otherwise we have already initialized it - if (min_log_id == 0) - min_log_id = last_log_read_result->first_read_index; - if (last_log_read_result->last_read_index != 0) max_log_id = last_log_read_result->last_read_index; @@ -1506,12 +1597,10 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin }; /// we can have empty log (with zero entries) and last_log_read_result will be initialized - if (!last_log_read_result || min_log_id == 0) /// We just may have no logs (only snapshot or nothing) + if (!last_log_read_result || entry_storage.empty()) /// We just may have no logs (only snapshot or nothing) { /// Just to be sure they don't exist removeAllLogs(); - - min_log_id = last_commited_log_index; max_log_id = last_commited_log_index == 0 ? 0 : last_commited_log_index - 1; } else if (last_commited_log_index != 0 && max_log_id < last_commited_log_index - 1) /// If we have more fresh snapshot than our logs @@ -1523,7 +1612,6 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin last_commited_log_index - 1); removeAllLogs(); - min_log_id = last_commited_log_index; max_log_id = last_commited_log_index - 1; } else if (last_log_is_not_complete) /// if it's complete just start new one @@ -1554,9 +1642,9 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin } else if (last_log_read_result->error) { - LOG_INFO(log, "Chagelog {} read finished with error but some logs were read from it, file will not be removed", description->path); + LOG_INFO(log, "Changelog {} read finished with error but some logs were read from it, file will not be removed", description->path); remove_invalid_logs(); - entry_storage.cleanAfter(last_log_read_result->log_start_index); + entry_storage.cleanAfter(last_log_read_result->last_read_index); move_from_latest_logs_disks(existing_changelogs.at(last_log_read_result->log_start_index)); } /// don't mix compressed and uncompressed writes @@ -1591,10 +1679,13 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin moveFileBetweenDisks(description->disk, description, disk, description->path); } - if (size() != 0) - entry_storage.cacheFirstLog(min_log_id); initialized = true; } +catch (...) +{ + tryLogCurrentException(__PRETTY_FUNCTION__); + +} void Changelog::initWriter(ChangelogFileDescriptionPtr description) @@ -1735,7 +1826,7 @@ void Changelog::appendCompletionThread() if (auto raft_server_locked = raft_server.lock()) raft_server_locked->notify_log_append_completion(append_ok); else - LOG_WARNING(log, "Raft server is not set in LogStore."); + LOG_INFO(log, "Raft server is not set in LogStore."); } } @@ -1848,12 +1939,6 @@ void Changelog::appendEntry(uint64_t index, const LogEntryPtr & log_entry) throw Exception(ErrorCodes::LOGICAL_ERROR, "Changelog must be initialized before appending records"); entry_storage.addEntry(index, log_entry); - if (min_log_id == 0) - { - min_log_id = index; - entry_storage.cacheFirstLog(index); - } - max_log_id = index; if (!write_operations.push(AppendLog{index, log_entry})) @@ -1967,16 +2052,23 @@ void Changelog::compact(uint64_t up_to_log_index) else /// Files are ordered, so all subsequent should exist break; } - /// Compaction from the past is possible, so don't make our min_log_id smaller. - min_log_id = std::max(min_log_id, up_to_log_index + 1); - entry_storage.cacheFirstLog(min_log_id); entry_storage.cleanUpTo(up_to_log_index + 1); if (need_rotate) current_writer->rotate(up_to_log_index + 1); - LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, min_log_id, max_log_id); + LOG_INFO(log, "Compaction up to {} finished new min index {}, new max index {}", up_to_log_index, getStartIndex(), max_log_id); +} + +uint64_t Changelog::getNextEntryIndex() const +{ + return max_log_id + 1; +} + +uint64_t Changelog::getStartIndex() const +{ + return entry_storage.empty() ? max_log_id + 1 : entry_storage.getFirstIndex(); } LogEntryPtr Changelog::getLastEntry() const @@ -2049,7 +2141,7 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer) buffer.get(buf_local); LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local); - if (i == 0 && cur_index >= min_log_id && cur_index <= max_log_id) + if (i == 0 && cur_index >= entry_storage.getFirstIndex() && cur_index <= max_log_id) writeAt(cur_index, log_entry); else appendEntry(cur_index, log_entry); @@ -2095,6 +2187,11 @@ std::shared_ptr Changelog::flushAsync() return failed; } +uint64_t Changelog::size() const +{ + return entry_storage.size(); +} + void Changelog::shutdown() { LOG_DEBUG(log, "Shutting down Changelog"); @@ -2173,7 +2270,7 @@ bool Changelog::isInitialized() const void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const { - if (size() > 0) + if (!entry_storage.empty()) { log_info.first_log_idx = getStartIndex(); auto first_entry = entryAt(log_info.first_log_idx); @@ -2181,7 +2278,7 @@ void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const log_info.first_log_term = first_entry->get_term(); log_info.last_log_idx = max_log_id; - auto last_entry = entryAt(log_info.first_log_idx); + auto last_entry = entryAt(log_info.last_log_idx); chassert(last_entry != nullptr); log_info.last_log_term = last_entry->get_term(); } diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index 5fdb1a27840..d7152f350f7 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -180,6 +180,10 @@ struct LogEntryStorage bool isConfLog(uint64_t index) const; + size_t empty() const; + size_t size() const; + size_t getFirstIndex() const; + void shutdown(); private: void prefetchCommitLogs(); @@ -222,8 +226,8 @@ private: LogEntryPtr latest_config; uint64_t latest_config_index = 0; - LogEntryPtr first_log_entry; - uint64_t first_log_index = 0; + mutable LogEntryPtr first_log_entry; + mutable uint64_t first_log_index = 0; std::unique_ptr commit_logs_prefetcher; @@ -249,6 +253,7 @@ private: std::vector unapplied_indices_with_log_locations; std::unordered_map logs_location; size_t max_index_with_location = 0; + size_t min_index_with_location = 0; std::unordered_set conf_logs_indices; @@ -284,9 +289,9 @@ public: /// Remove log files with to_log_index <= up_to_log_index. void compact(uint64_t up_to_log_index); - uint64_t getNextEntryIndex() const { return max_log_id + 1; } + uint64_t getNextEntryIndex() const; - uint64_t getStartIndex() const { return min_log_id; } + uint64_t getStartIndex() const; /// Last entry in log, or fake entry with term 0 if log is empty LogEntryPtr getLastEntry() const; @@ -315,7 +320,7 @@ public: void shutdown(); - uint64_t size() const { return max_log_id - min_log_id + 1; } + uint64_t size() const; uint64_t lastDurableIndex() const { @@ -369,9 +374,6 @@ private: std::unordered_set conf_logs_indices; - /// Start log_id which exists in all "active" logs - /// min_log_id + 1 == max_log_id means empty log storage for NuRaft - uint64_t min_log_id = 0; uint64_t max_log_id = 0; /// For compaction, queue of delete not used logs /// 128 is enough, even if log is not removed, it's not a problem diff --git a/src/Coordination/InMemoryLogStore.cpp b/src/Coordination/InMemoryLogStore.cpp index ca240584a54..ee93c02b4b0 100644 --- a/src/Coordination/InMemoryLogStore.cpp +++ b/src/Coordination/InMemoryLogStore.cpp @@ -191,4 +191,10 @@ bool InMemoryLogStore::compact(uint64_t last_log_index) return true; } +bool InMemoryLogStore::is_conf(uint64_t index) +{ + auto entry = entry_at(index); + return entry != nullptr && entry->get_val_type() == nuraft::conf; +} + } diff --git a/src/Coordination/InMemoryLogStore.h b/src/Coordination/InMemoryLogStore.h index fc56826c81b..82c676639d5 100644 --- a/src/Coordination/InMemoryLogStore.h +++ b/src/Coordination/InMemoryLogStore.h @@ -39,6 +39,8 @@ public: bool flush() override { return true; } + bool is_conf(uint64_t index) override; + private: std::map> logs TSA_GUARDED_BY(logs_lock); mutable std::mutex logs_lock; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 59a550177a4..5fcf5f85719 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -558,6 +558,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_EQ(changelog.size(), 3); + keeper_context->setLastCommitIndex(2); changelog.compact(2); EXPECT_EQ(changelog.size(), 1); @@ -582,6 +583,7 @@ TEST_P(CoordinationTest, ChangelogTestCompaction) EXPECT_TRUE(fs::exists("./logs/changelog_1_5.bin" + params.extension)); EXPECT_TRUE(fs::exists("./logs/changelog_6_10.bin" + params.extension)); + keeper_context->setLastCommitIndex(6); changelog.compact(6); std::this_thread::sleep_for(std::chrono::microseconds(1000)); @@ -1812,7 +1814,10 @@ void testLogAndStateMachine( snapshot_task.create_snapshot(std::move(snapshot_task.snapshot)); } if (snapshot_created && changelog.size() > settings->reserved_log_items) + { + keeper_context->setLastCommitIndex(i - settings->reserved_log_items); changelog.compact(i - settings->reserved_log_items); + } } SnapshotsQueue snapshots_queue1{1}; @@ -2132,6 +2137,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_2); + keeper_context->setLastCommitIndex(105); changelog_2.compact(105); std::this_thread::sleep_for(std::chrono::microseconds(1000)); @@ -2157,6 +2163,7 @@ TEST_P(CoordinationTest, TestRotateIntervalChanges) waitDurableLogs(changelog_3); + keeper_context->setLastCommitIndex(125); changelog_3.compact(125); std::this_thread::sleep_for(std::chrono::microseconds(1000)); assertFileDeleted("./logs/changelog_101_110.bin" + params.extension); From d5eec2d85b616a13fe5123ab4cdc7f0d3471e425 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 1 Feb 2024 16:27:57 +0000 Subject: [PATCH 026/145] trying to fix (casting the result to int) --- base/base/Decimal_fwd.h | 6 + src/Functions/FunctionBinaryArithmetic.h | 137 ++++++++---------- .../02975_intdiv_with_decimal.reference | 20 +-- .../0_stateless/02975_intdiv_with_decimal.sql | 8 +- 4 files changed, 84 insertions(+), 87 deletions(-) diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h index 589d6224917..beb228cea3c 100644 --- a/base/base/Decimal_fwd.h +++ b/base/base/Decimal_fwd.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace wide { @@ -44,3 +45,8 @@ concept is_over_big_int = || std::is_same_v || std::is_same_v; } + +template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; +template <> struct is_signed { static constexpr bool value = true; }; diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index e31183573c3..9b0afee5053 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -152,22 +152,7 @@ public: /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). using ResultDataType = Switch< /// Result must be Integer - Case< - only_integer && (IsDataTypeDecimal || IsDataTypeDecimal), - Switch< - Case< - IsDataTypeDecimal, - Switch< - Case, DataTypeInt256>, - Case, DataTypeInt128>, - Case, DataTypeInt64>, - Case, DataTypeInt32>>>, - Case< - IsDataTypeDecimal, - Switch< - Case, LeftDataType>, - Case, DataTypeInt64>, - Case, DataTypeInt32>>>>>, + Case::div_int || IsOperation::div_int_or_zero, DataTypeFromFieldType>, /// Decimal cases Case || IsDataTypeDecimal), InvalidType>, @@ -1687,16 +1672,7 @@ public: if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { if constexpr (is_div_int || is_div_int_or_zero) - { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else - type_res = std::make_shared(); - } + type_res = std::make_shared(); else { if constexpr (is_division) @@ -1721,54 +1697,22 @@ public: else if constexpr (((IsDataTypeDecimal && IsFloatingPoint) || (IsDataTypeDecimal && IsFloatingPoint))) { - if constexpr ((is_div_int || is_div_int_or_zero) && IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else - type_res = std::make_shared(); - } - else if constexpr (is_div_int || is_div_int_or_zero) - { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else - type_res = std::make_shared(); - } + if constexpr (is_div_int || is_div_int_or_zero) + type_res = std::make_shared(); else type_res = std::make_shared(); } else if constexpr (IsDataTypeDecimal) { if constexpr (is_div_int || is_div_int_or_zero) - { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else if constexpr (std::is_same_v) - type_res = std::make_shared(); - else - type_res = std::make_shared(); - } + type_res = std::make_shared(); else type_res = std::make_shared(left.getPrecision(), left.getScale()); } else if constexpr (IsDataTypeDecimal) { - if constexpr ((is_div_int || is_div_int_or_zero) && IsIntegralOrExtended) - type_res = std::make_shared(); - else if constexpr (is_div_int || is_div_int_or_zero) - { - if constexpr (std::is_same_v) - type_res = std::make_shared(); - else - type_res = std::make_shared(); - } + if constexpr (is_div_int || is_div_int_or_zero) + type_res = std::make_shared(); else type_res = std::make_shared(right.getPrecision(), right.getScale()); } @@ -2089,10 +2033,8 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A constexpr bool decimal_with_float = (IsDataTypeDecimal && IsFloatingPoint) || (IsFloatingPoint && IsDataTypeDecimal); - constexpr bool is_div_int_with_decimal = (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal || IsDataTypeDecimal); - - using T0 = std::conditional_t>; - using T1 = std::conditional_t>; + using T0 = std::conditional_t; + using T1 = std::conditional_t; using ResultType = typename ResultDataType::FieldType; using ColVecT0 = ColumnVectorOrDecimal; using ColVecT1 = ColumnVectorOrDecimal; @@ -2108,12 +2050,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A left_col = castColumn(arguments[0], converted_type); right_col = castColumn(arguments[1], converted_type); } - else if constexpr (is_div_int_with_decimal) - { - const auto converted_type = std::make_shared(); - left_col = castColumn(arguments[0], converted_type); - right_col = castColumn(arguments[1], converted_type); - } else { left_col = arguments[0].column; @@ -2139,6 +2075,61 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A col_left_size, right_nullmap); } + else if constexpr (!decimal_with_float && (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal || IsDataTypeDecimal)) + { + using DecimalResultType = Switch< + Case< + IsDataTypeDecimal && IsDataTypeDecimal && UseLeftDecimal, + LeftDataType>, + Case && IsDataTypeDecimal, RightDataType>, + Case && IsIntegralOrExtended, LeftDataType>, + Case && IsIntegralOrExtended, RightDataType>, + + /// Decimal Real is not supported (traditional DBs convert Decimal Real to Real) + Case && !IsIntegralOrExtendedOrDecimal, InvalidType>, + Case && !IsIntegralOrExtendedOrDecimal, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType) + + if constexpr (!std::is_same_v) + { + DataTypePtr type_res; + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + { + if constexpr (is_division) + { + if (context->getSettingsRef().decimal_check_overflow) + { + /// Check overflow by using operands scale (based on big decimal division implementation details): + /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers + /// i.e. int_operand = decimal_operand*10^scale + /// For division, left operand will be scaled by right operand scale also to do big integer division, + /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale + /// So, we can check upfront possible overflow just by checking max scale used for left operand + /// Note: it doesn't detect all possible overflow during big decimal division + if (left.getScale() + right.getScale() > DecimalResultType::maxPrecision()) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); + } + } + DecimalResultType result_type = decimalResultType(left, right); + type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); + } + else if constexpr (IsDataTypeDecimal) + type_res = std::make_shared(left.getPrecision(), left.getScale()); + else + type_res = std::make_shared(right.getPrecision(), right.getScale()); + // Create result decimal type somehow, maybe similar to how we do it in getReturnTypeImplStatic + + auto res = executeNumericWithDecimal( + left, right, + col_left_const, col_right_const, + col_left, col_right, + col_left_size, + right_nullmap); + + auto col = ColumnWithTypeAndName(res, type_res, name); + return castColumn(col, std::make_shared()); + } + return nullptr; + } else // can't avoid else and another indentation level, otherwise the compiler would try to instantiate // ColVecResult for Decimals which would lead to a compile error. { diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference index 594dcee975a..5540734ae4c 100644 --- a/tests/queries/0_stateless/02975_intdiv_with_decimal.reference +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.reference @@ -1,19 +1,19 @@ 2 2 +1 +2 +2 2 2 2 2 2 2 +1 2 +1 2 -2 -2 -2 -2 -2 -2 +1 2 2 2 @@ -34,6 +34,7 @@ 2 2 2 +1 2 2 2 @@ -42,12 +43,11 @@ 2 2 2 +1 2 +1 2 -2 -2 -2 -2 +1 2 2 2 diff --git a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql index 18e657caa8a..0911a481251 100644 --- a/tests/queries/0_stateless/02975_intdiv_with_decimal.sql +++ b/tests/queries/0_stateless/02975_intdiv_with_decimal.sql @@ -13,9 +13,9 @@ SELECT intDiv(toDecimal256(4.4, 5), toDecimal32(2.2, 2)); SELECT intDiv(4, toDecimal64(2.2, 2)); SELECT intDiv(toDecimal32(4.4, 2), toDecimal64(2.2, 2)); SELECT intDiv(4, toDecimal128(2.2, 3)); -SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 3)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal128(2.2, 2)); SELECT intDiv(4, toDecimal256(2.2, 4)); -SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 4)); +SELECT intDiv(toDecimal32(4.4, 2), toDecimal256(2.2, 2)); SELECT intDiv(toDecimal64(4.4, 2), toDecimal64(2.2, 2)); SELECT intDiv(toDecimal128(4.4, 2), toDecimal64(2.2, 2)); SELECT intDiv(toDecimal256(4.4, 2), toDecimal64(2.2, 2)); @@ -48,9 +48,9 @@ SELECT intDivOrZero(toDecimal256(4.4, 5), toDecimal32(2.2, 2)); SELECT intDivOrZero(4, toDecimal64(2.2, 2)); SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal64(2.2, 2)); SELECT intDivOrZero(4, toDecimal128(2.2, 3)); -SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 3)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal128(2.2, 2)); SELECT intDivOrZero(4, toDecimal256(2.2, 4)); -SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 4)); +SELECT intDivOrZero(toDecimal32(4.4, 2), toDecimal256(2.2, 2)); SELECT intDivOrZero(toDecimal64(4.4, 2), toDecimal64(2.2, 2)); SELECT intDivOrZero(toDecimal128(4.4, 2), toDecimal64(2.2, 2)); SELECT intDivOrZero(toDecimal256(4.4, 2), toDecimal64(2.2, 2)); From b1b564a83589ed8282bd5307908e32955fa6a682 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 1 Feb 2024 16:42:11 +0000 Subject: [PATCH 027/145] Fix build again --- src/Coordination/Changelog.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index d7152f350f7..e61bcc5f163 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -4,6 +4,8 @@ #include #include +#include + #include #include @@ -19,7 +21,7 @@ namespace Poco class Logger; } -using LoggerPtr = std::shared_ptr; +using LoggerPtr = boost::intrusive_ptr; namespace DB { From ff21aa9a19a9a2ebd9e16aa32ea1a10d4e988abe Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 1 Feb 2024 19:47:05 +0000 Subject: [PATCH 028/145] Don't infer floats in exponential notation by default --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Formats/SchemaInferenceUtils.cpp | 16 +++++-- src/IO/readFloatText.cpp | 3 ++ src/IO/readFloatText.h | 48 +++++++++++-------- ...02982_dont_infer_exponent_floats.reference | 2 + .../02982_dont_infer_exponent_floats.sql | 3 ++ 9 files changed, 51 insertions(+), 25 deletions(-) create mode 100644 tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference create mode 100644 tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4bb48cb3a29..a892c3bb58e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1008,6 +1008,7 @@ class IColumn; M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ + M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats", 0) \ M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ad04ee79995..a70daf8e1c7 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -84,6 +84,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.2", {{"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}}}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 62cbadec4f4..78378168d02 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -226,6 +226,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; + format_settings.try_infer_exponent_floats = settings.input_format_try_infer_exponent_floats; format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 30e4dd04513..ba7cd6055a7 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -46,6 +46,7 @@ struct FormatSettings bool try_infer_integers = false; bool try_infer_dates = false; bool try_infer_datetimes = false; + bool try_infer_exponent_floats = false; enum class DateTimeInputFormat { diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 2cfcff75edd..06b52e7a7a2 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -865,6 +866,13 @@ namespace return std::make_shared(nested_types); } + bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings) + { + if (settings.try_infer_exponent_floats) + return tryReadFloatText(value, buf); + return tryReadFloatTextNoExponent(value, buf); + } + DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings) { if (buf.eof()) @@ -903,7 +911,7 @@ namespace buf.position() = number_start; } - if (tryReadFloatText(tmp_float, buf)) + if (tryReadFloat(tmp_float, buf, settings)) { if (read_int && buf.position() == int_end) return std::make_shared(); @@ -937,7 +945,7 @@ namespace peekable_buf.rollbackToCheckpoint(true); } - if (tryReadFloatText(tmp_float, peekable_buf)) + if (tryReadFloat(tmp_float, peekable_buf, settings)) { /// Float parsing reads no fewer bytes than integer parsing, /// so position of the buffer is either the same, or further. @@ -949,7 +957,7 @@ namespace return std::make_shared(); } } - else if (tryReadFloatText(tmp_float, buf)) + else if (tryReadFloat(tmp_float, buf, settings)) { return std::make_shared(); } @@ -1390,7 +1398,7 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting buf.position() = buf.buffer().begin(); Float64 tmp; - if (tryReadFloatText(tmp, buf) && buf.eof()) + if (tryReadFloat(tmp, buf, settings) && buf.eof()) return std::make_shared(); return nullptr; diff --git a/src/IO/readFloatText.cpp b/src/IO/readFloatText.cpp index d1143f7c62c..17ccc1b25b7 100644 --- a/src/IO/readFloatText.cpp +++ b/src/IO/readFloatText.cpp @@ -67,4 +67,7 @@ template void readFloatText(Float64 &, ReadBuffer &); template bool tryReadFloatText(Float32 &, ReadBuffer &); template bool tryReadFloatText(Float64 &, ReadBuffer &); +template bool tryReadFloatTextNoExponent(Float32 &, ReadBuffer &); +template bool tryReadFloatTextNoExponent(Float64 &, ReadBuffer &); + } diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 23e904f305a..51964636389 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -324,7 +324,7 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf) } -template +template ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) { static_assert(std::is_same_v || std::is_same_v, "Argument for readFloatTextImpl must be float or double"); @@ -395,30 +395,33 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) after_point_exponent = (read_digits > significant_digits ? -significant_digits : static_cast(-read_digits)) - after_point_num_leading_zeros; } - if (checkChar('e', in) || checkChar('E', in)) + if constexpr (allow_exponent) { - if (in.eof()) + if (checkChar('e', in) || checkChar('E', in)) { - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent"); - else - return false; - } + if (in.eof()) + { + if constexpr (throw_exception) + throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent"); + else + return false; + } - bool exponent_negative = false; - if (*in.position() == '-') - { - exponent_negative = true; - ++in.position(); - } - else if (*in.position() == '+') - { - ++in.position(); - } + bool exponent_negative = false; + if (*in.position() == '-') + { + exponent_negative = true; + ++in.position(); + } + else if (*in.position() == '+') + { + ++in.position(); + } - readUIntTextUpToNSignificantDigits<4>(exponent, in); - if (exponent_negative) - exponent = -exponent; + readUIntTextUpToNSignificantDigits<4>(exponent, in); + if (exponent_negative) + exponent = -exponent; + } } if (after_point) @@ -604,4 +607,7 @@ template bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { retu template void readFloatText(T & x, ReadBuffer & in) { readFloatTextFast(x, in); } template bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); } +/// Don't read exponent part of the number. +template bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl(x, in); } + } diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference new file mode 100644 index 00000000000..b6d1ff865e5 --- /dev/null +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.reference @@ -0,0 +1,2 @@ +c1 Nullable(String) +c1 Nullable(Float64) diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql new file mode 100644 index 00000000000..17f62557fc2 --- /dev/null +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql @@ -0,0 +1,3 @@ +DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 0; +DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 1; + From eba094e228cdc53e4cb9eea35a8860d6f7ba2fac Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 2 Feb 2024 16:48:30 +0800 Subject: [PATCH 029/145] optimize sum decimal and bitint conditionally --- src/AggregateFunctions/AggregateFunctionSum.h | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 5781ab69c6b..b3ba7cc7f57 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -146,9 +146,7 @@ struct AggregateFunctionSumData size_t count = end - start; const auto * end_ptr = ptr + count; - if constexpr ( - (is_integer && !is_big_int_v) - || (is_decimal && !std::is_same_v && !std::is_same_v)) + if constexpr ((is_integer || is_decimal)&&!is_over_big_int) { /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null) /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I @@ -163,8 +161,38 @@ struct AggregateFunctionSumData Impl::add(sum, local_sum); return; } + else if constexpr (is_integer || is_decimal) + { + /// Use a mask to discard the value if it is null + T local_sum{}; + using MaskType = std::conditional_t; + alignas(64) const MaskType masks[2] = {0, -1}; + while (ptr < end_ptr) + { + Value v = *ptr; + if constexpr (!add_if_zero) + { + if constexpr (is_integer) + v &= masks[*condition_map]; + else + v.value &= masks[*condition_map]; + } + else + { + if constexpr (is_integer) + v &= masks[!*condition_map]; + else + v.value &= masks[!*condition_map]; + } - if constexpr (std::is_floating_point_v) + Impl::add(local_sum, v); + ++ptr; + ++condition_map; + } + Impl::add(sum, local_sum); + return; + } + else if constexpr (std::is_floating_point_v) { /// For floating point we use a similar trick as above, except that now we reinterpret the floating point number as an unsigned /// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep) From dd484fc31201e411dd63b1a3dc9d7927367d144c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 31 Jan 2024 22:12:33 +0000 Subject: [PATCH 030/145] Simplify what happens in SELECT all, ... [...] ORDER BY ALL Previously setting `enable_order_by_all` distinguished for ORDER BY ALL whether we should sort by column 'all' (if given in the SELECT clause) or by all columns. The actual behavior was not always intuitive. Now, we throw unconditionally an exception which also simplifies the handling a bit. Only an edge case is affected and if users really want to run ORDER BY ALL on a column names 'all', they can alias it. --- docs/en/operations/settings/settings.md | 37 +------------------ .../statements/select/order-by.md | 4 +- src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 ++-- src/Core/Settings.h | 2 +- src/Interpreters/TreeRewriter.cpp | 21 +++++++---- .../0_stateless/02943_order_by_all.reference | 30 ++++++--------- .../0_stateless/02943_order_by_all.sql | 30 +++++++-------- 7 files changed, 49 insertions(+), 83 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c673464b23d..4e73afa3ed9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4192,41 +4192,6 @@ Result: └─────┴─────┴───────┘ ``` -## enable_order_by_all {#enable-order-by-all} - -Enables or disables sorting by `ALL` columns, i.e. [ORDER BY](../../sql-reference/statements/select/order-by.md) - -Possible values: - -- 0 — Disable ORDER BY ALL. -- 1 — Enable ORDER BY ALL. - -Default value: `1`. - -**Example** - -Query: - -```sql -CREATE TABLE TAB(C1 Int, C2 Int, ALL Int) ENGINE=Memory(); - -INSERT INTO TAB VALUES (10, 20, 30), (20, 20, 10), (30, 10, 20); - -SELECT * FROM TAB ORDER BY ALL; -- returns an error that ALL is ambiguous - -SELECT * FROM TAB ORDER BY ALL SETTINGS enable_order_by_all; -``` - -Result: - -```text -┌─C1─┬─C2─┬─ALL─┐ -│ 20 │ 20 │ 10 │ -│ 30 │ 10 │ 20 │ -│ 10 │ 20 │ 30 │ -└────┴────┴─────┘ -``` - ## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string} Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array. @@ -5321,4 +5286,4 @@ Allow to ignore schema evolution in Iceberg table engine and read all data using Enabling this setting can lead to incorrect result as in case of evolved schema all data files will be read using the same schema. ::: -Default value: 'false'. \ No newline at end of file +Default value: 'false'. diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index d6432a7b4f8..bea5dcab461 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -9,10 +9,10 @@ The `ORDER BY` clause contains - a list of expressions, e.g. `ORDER BY visits, search_phrase`, - a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or -- `ALL` which means all columns of the `SELECT` clause, e.g. `ORDER BY ALL`. +- `ALL` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY ALL`. To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0. -To disable sorting by `ALL`, set setting [enable_order_by_all](../../../operations/settings/settings.md#enable-order-by-all) = 0. +`ORDER BY ALL` cannot be used when the `SELECT` clause contains identifiers or aliases named `all` (case-insensitively). The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction. Unless an explicit sort order is specified, `ASC` is used by default. diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index cb1e94305fb..a2c719606d8 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1214,7 +1214,7 @@ private: static void expandGroupByAll(QueryNode & query_tree_node_typed); - void expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings); + void expandOrderByAll(QueryNode & query_tree_node_typed); static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context); @@ -2334,9 +2334,9 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed) query_tree_node_typed.setIsGroupByAll(false); } -void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed, const Settings & settings) +void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed) { - if (!settings.enable_order_by_all || !query_tree_node_typed.isOrderByAll()) + if (!query_tree_node_typed.isOrderByAll()) return; auto * all_node = query_tree_node_typed.getOrderBy().getNodes()[0]->as(); @@ -7369,7 +7369,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (settings.enable_positional_arguments) replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); - expandOrderByAll(query_node_typed, settings); + expandOrderByAll(query_node_typed); resolveSortNodeList(query_node_typed.getOrderByNode(), scope); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4bb48cb3a29..fca0554dc99 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -861,7 +861,6 @@ class IColumn; M(UInt64, cache_warmer_threads, 4, "Only available in ClickHouse Cloud", 0) \ M(Int64, ignore_cold_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud", 0) \ - M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ // End of COMMON_SETTINGS @@ -929,6 +928,7 @@ class IColumn; MAKE_OBSOLETE(M, Bool, allow_experimental_undrop_table_query, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_s3queue, true) \ MAKE_OBSOLETE(M, Bool, query_plan_optimize_primary_key, true) \ + MAKE_OBSOLETE(M, Bool, enable_order_by_all, true) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index ecd021328e7..0a260969cd4 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -788,14 +788,21 @@ void expandOrderByAll(ASTSelectQuery * select_query) for (const auto & expr : select_query->select()->children) { if (auto * identifier = expr->as(); identifier != nullptr) - if (Poco::toUpper(identifier->name()) == "ALL" || Poco::toUpper(identifier->alias) == "ALL") - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, - "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); - + { + if (identifier->alias.empty()) + { + if (Poco::toUpper(identifier->name()) == "ALL") + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column with name 'all'"); + } + else + { + if (Poco::toUpper(identifier->alias) == "ALL") + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column alias with name 'all'"); + } + } if (auto * function = expr->as(); function != nullptr) if (Poco::toUpper(function->alias) == "ALL") - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, - "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort an expression with name 'all'"); auto elem = std::make_shared(); elem->direction = all_elem->direction; @@ -1324,7 +1331,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( expandGroupByAll(select_query); // expand ORDER BY ALL - if (settings.enable_order_by_all && select_query->order_by_all) + if (select_query->order_by_all) expandOrderByAll(select_query); /// Remove unneeded columns according to 'required_result_columns'. diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference index 6eed33cc68d..d91f6dfc4a5 100644 --- a/tests/queries/0_stateless/02943_order_by_all.reference +++ b/tests/queries/0_stateless/02943_order_by_all.reference @@ -49,15 +49,14 @@ A 2 2 A 3 B \N C --- what happens if some column "all" already exists? -B 3 10 -D 1 20 -A 2 30 -C \N 40 -B 3 10 -D 1 20 -A 2 30 -C \N 40 +-- "ALL" in ORDER BY is case-insensitive +A 2 +B 3 +C \N +D 1 +A 2 +B 3 +C \N D 1 A 2 B 3 @@ -66,14 +65,9 @@ D 1 A 2 B 3 C \N -A 2 -B 3 -D 1 -\N -A 2 -B 3 -D 1 -\N +D 1 +-- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity +-- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning B 3 10 D 1 20 A 2 30 @@ -82,7 +76,7 @@ B 3 10 D 1 20 A 2 30 C \N 40 --- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause +-- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column) A 2 30 B 3 10 C \N 40 diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql index 0960d75ad96..f10184e79b9 100644 --- a/tests/queries/0_stateless/02943_order_by_all.sql +++ b/tests/queries/0_stateless/02943_order_by_all.sql @@ -42,43 +42,43 @@ SET allow_experimental_analyzer = 1; SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST; SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST; -SELECT '-- what happens if some column "all" already exists?'; +SELECT '-- "ALL" in ORDER BY is case-insensitive'; + +SET allow_experimental_analyzer = 0; +SELECT a, b FROM order_by_all ORDER BY ALL; +SELECT a, b FROM order_by_all ORDER BY all; + +SET allow_experimental_analyzer = 1; +SELECT a, b FROM order_by_all ORDER BY ALL; +SELECT a, b FROM order_by_all ORDER BY all; + +SELECT '-- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity'; -- columns SET allow_experimental_analyzer = 0; -SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; SET allow_experimental_analyzer = 1; -SELECT a, b, all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b, all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; -- column aliases SET allow_experimental_analyzer = 0; -SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; SET allow_experimental_analyzer = 1; -SELECT a, b AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT a, b AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; -- expressions SET allow_experimental_analyzer = 0; -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; SET allow_experimental_analyzer = 1; -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all; -- { serverError UNEXPECTED_EXPRESSION } SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY all SETTINGS enable_order_by_all = false; + +SELECT '-- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning'; SET allow_experimental_analyzer = 0; SELECT a, b, all FROM order_by_all ORDER BY all, a; @@ -88,7 +88,7 @@ SELECT a, b, all FROM order_by_all ORDER BY all, a; DROP TABLE order_by_all; -SELECT '-- test SELECT * ORDER BY ALL with no "all" column in the SELECT clause'; +SELECT '-- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column)'; CREATE TABLE order_by_all ( @@ -96,7 +96,7 @@ CREATE TABLE order_by_all b Nullable(Int32), c UInt64, ) - ENGINE = Memory; +ENGINE = Memory; INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); From a24849129947f221f377889d75fc44effba63d18 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 2 Feb 2024 11:14:03 +0100 Subject: [PATCH 031/145] remove new settings from older versions --- docker/test/upgrade/run.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index aaba5cc6a8c..af535325119 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -77,6 +77,12 @@ remove_keeper_config "async_replication" "1" # create_if_not_exists feature flag doesn't exist on some older versions remove_keeper_config "create_if_not_exists" "[01]" +# latest_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" + +# commit_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" + # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml From e79ddd54afa54dc0c964774899f7250514741004 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Sat, 3 Feb 2024 00:56:37 +0000 Subject: [PATCH 032/145] fix tests --- src/Functions/FunctionBinaryArithmetic.h | 19 +++---------------- .../00700_decimal_arithm.reference | 6 +++--- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 9b0afee5053..4d768311aaf 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -146,7 +146,6 @@ private: /// it's not correct for Decimal public: static constexpr bool allow_decimal = IsOperation::allow_decimal; - static constexpr bool only_integer = IsOperation::div_int || IsOperation::div_int_or_zero; /// Appropriate result type for binary operator on numeric types. "Date" can also mean /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). @@ -626,7 +625,10 @@ private: if constexpr (op_case == OpCase::RightConstant) { if ((*right_nullmap)[0]) + { + c[0] = ResultType(); return; + } for (size_t i = 0; i < size; ++i) c[i] = apply_func(undec(a[i]), undec(b)); @@ -2094,21 +2096,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A DataTypePtr type_res; if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { - if constexpr (is_division) - { - if (context->getSettingsRef().decimal_check_overflow) - { - /// Check overflow by using operands scale (based on big decimal division implementation details): - /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers - /// i.e. int_operand = decimal_operand*10^scale - /// For division, left operand will be scaled by right operand scale also to do big integer division, - /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale - /// So, we can check upfront possible overflow just by checking max scale used for left operand - /// Note: it doesn't detect all possible overflow during big decimal division - if (left.getScale() + right.getScale() > DecimalResultType::maxPrecision()) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); - } - } DecimalResultType result_type = decimalResultType(left, right); type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); } diff --git a/tests/queries/0_stateless/00700_decimal_arithm.reference b/tests/queries/0_stateless/00700_decimal_arithm.reference index 20f04696b1b..109c0632fb1 100644 --- a/tests/queries/0_stateless/00700_decimal_arithm.reference +++ b/tests/queries/0_stateless/00700_decimal_arithm.reference @@ -18,10 +18,10 @@ 63 -21 42 882 -882 0 2 0 2 63 -21 42 882 -882 0 2 0 2 1.00305798474369219219752355409390731264 0.16305798474369219219752355409390731264 -1.490591730234615865843651857942052864 -1.38847100762815390390123822295304634368 1.38847100762815390390123822295304634368 -0.00000000000000000000000000000000000001 0.00000000000000000000000000000000000001 -63.42 -21.42 41.58 890.82 -890.82 0.495 1.98 0 2 +63.42 -21.42 41.58 890.82 -890.82 0.495 1.98 0 1 63.42 -21.42 41.58 890.82 -890.82 -63.42 -21.42 41.58 890.82 -890.82 0.495049504950495049 1.980198019801980198 0 2 -63.42 -21.42 41.58 890.82 -890.82 0.49 1.98 0 2 +63.42 -21.42 41.58 890.82 -890.82 0.495049504950495049 1.980198019801980198 0 1 +63.42 -21.42 41.58 890.82 -890.82 0.49 1.98 0 1 -42 42 42 42 0.42 0.42 0.42 42.42 42.42 42.42 0 0 0 0 0 0 0 0 0 0 42 -42 -42 -42 -0.42 -0.42 -0.42 -42.42 -42.42 -42.42 From 9e1a3c7c24b6499babac7dd06383af8872fc9fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Sun, 4 Feb 2024 14:25:04 +0800 Subject: [PATCH 033/145] Update src/AggregateFunctions/AggregateFunctionSum.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Raúl Marín --- src/AggregateFunctions/AggregateFunctionSum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index b3ba7cc7f57..53ddec597b8 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -161,7 +161,7 @@ struct AggregateFunctionSumData Impl::add(sum, local_sum); return; } - else if constexpr (is_integer || is_decimal) + else if constexpr (is_over_big_int) { /// Use a mask to discard the value if it is null T local_sum{}; From d68a0e7b3e57b1b1471bd02531ebb0cadebf897e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Sun, 4 Feb 2024 14:25:14 +0800 Subject: [PATCH 034/145] Update src/AggregateFunctions/AggregateFunctionSum.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Raúl Marín --- src/AggregateFunctions/AggregateFunctionSum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 53ddec597b8..ac9e77c8a33 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -146,7 +146,7 @@ struct AggregateFunctionSumData size_t count = end - start; const auto * end_ptr = ptr + count; - if constexpr ((is_integer || is_decimal)&&!is_over_big_int) + if constexpr ((is_integer || is_decimal) && !is_over_big_int) { /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null) /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I From be19fb9935d0810f25cba17c96fe58de8eb85012 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 4 Feb 2024 14:39:55 +0800 Subject: [PATCH 035/145] change as request --- src/AggregateFunctions/AggregateFunctionSum.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index ac9e77c8a33..58aaddf357a 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -163,19 +163,20 @@ struct AggregateFunctionSumData } else if constexpr (is_over_big_int) { - /// Use a mask to discard the value if it is null - T local_sum{}; + /// Use a mask to discard or keep the value to reduce branch miss. + /// Notice that for (U)Int128 or Decimal128, MaskType is Int8 instead of Int64, otherwise extra branches will be introduced by compiler (for unknown reason) and performance will be worse. using MaskType = std::conditional_t; alignas(64) const MaskType masks[2] = {0, -1}; + T local_sum{}; while (ptr < end_ptr) { Value v = *ptr; if constexpr (!add_if_zero) { if constexpr (is_integer) - v &= masks[*condition_map]; + v &= masks[!!*condition_map]; else - v.value &= masks[*condition_map]; + v.value &= masks[!!*condition_map]; } else { From 549b77021d3c448cf8802c7923ca03c0bf9a2781 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 4 Feb 2024 15:55:22 +0800 Subject: [PATCH 036/145] add some perf tests --- tests/performance/sum.xml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/performance/sum.xml b/tests/performance/sum.xml index 57b879a360d..36b898436bf 100644 --- a/tests/performance/sum.xml +++ b/tests/performance/sum.xml @@ -17,6 +17,13 @@ SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000) SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000) + select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000) + select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000) + CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory INSERT INTO nullfloat32 From 7ad48c2aa219ef1a79c5fcc593097d6e0d5d95fc Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 5 Feb 2024 17:02:16 +0100 Subject: [PATCH 037/145] Fix --- src/Coordination/Changelog.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index e61bcc5f163..d7152f350f7 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -4,8 +4,6 @@ #include #include -#include - #include #include @@ -21,7 +19,7 @@ namespace Poco class Logger; } -using LoggerPtr = boost::intrusive_ptr; +using LoggerPtr = std::shared_ptr; namespace DB { From 41202cd7b2f146e3b13f5d0279cb6439bc250f10 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 6 Feb 2024 09:39:22 +0100 Subject: [PATCH 038/145] fix upgrade --- docker/test/upgrade/run.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index af535325119..1aecc7331cd 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -115,6 +115,12 @@ remove_keeper_config "async_replication" "1" # create_if_not_exists feature flag doesn't exist on some older versions remove_keeper_config "create_if_not_exists" "[01]" +# latest_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+" + +# commit_logs_cache_size_threshold setting doesn't exist on some older versions +remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+" + # But we still need default disk because some tables loaded only into it sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ | sed "s|
s3
|
s3
default|" \ From 8733a9634a4342d57c7b7ae8a9ba8bc877ea76fd Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 7 Feb 2024 11:24:27 +0800 Subject: [PATCH 039/145] add uts --- .../0_stateless/02985_if_over_big_int_decimal.reference | 6 ++++++ tests/queries/0_stateless/02985_if_over_big_int_decimal.sql | 6 ++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/02985_if_over_big_int_decimal.reference create mode 100644 tests/queries/0_stateless/02985_if_over_big_int_decimal.sql diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference new file mode 100644 index 00000000000..055103ad134 --- /dev/null +++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference @@ -0,0 +1,6 @@ +49500 +49500 +49500 +49500 +49500 +49500 diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql new file mode 100644 index 00000000000..6868524d195 --- /dev/null +++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql @@ -0,0 +1,6 @@ +select sumIf(number::Int128, number % 10 == 0) from numbers(1000); +select sumIf(number::UInt128, number % 10 == 0) from numbers(1000); +select sumIf(number::Int256, number % 10 == 0) from numbers(1000); +select sumIf(number::UInt256, number % 10 == 0) from numbers(1000); +select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000); +select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000); From e55b60e05c8f564738d92bfa35f68a378732f690 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 8 Feb 2024 17:40:41 +0100 Subject: [PATCH 040/145] Fix --- src/Storages/RabbitMQ/RabbitMQConsumer.cpp | 28 +++++++++++++- src/Storages/RabbitMQ/RabbitMQConsumer.h | 3 ++ src/Storages/RabbitMQ/RabbitMQSource.cpp | 21 ++++++++--- src/Storages/RabbitMQ/RabbitMQSource.h | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 37 ++++++++++++++----- .../integration/test_storage_rabbitmq/test.py | 3 ++ 6 files changed, 77 insertions(+), 16 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp index 1843bebe3c7..28dc239ae37 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.cpp @@ -128,6 +128,32 @@ bool RabbitMQConsumer::ackMessages(const CommitInfo & commit_info) return false; } +bool RabbitMQConsumer::nackMessages(const CommitInfo & commit_info) +{ + if (state != State::OK) + return false; + + /// Nothing to nack. + if (!commit_info.delivery_tag || commit_info.delivery_tag <= last_commited_delivery_tag) + return false; + + if (consumer_channel->reject(commit_info.delivery_tag, AMQP::multiple)) + { + LOG_TRACE( + log, "Consumer rejected messages with deliveryTags from {} to {} on channel {}", + last_commited_delivery_tag, commit_info.delivery_tag, channel_id); + + return true; + } + + LOG_ERROR( + log, + "Failed to reject messages for {}:{}, (current commit point {}:{})", + commit_info.channel_id, commit_info.delivery_tag, + channel_id, last_commited_delivery_tag); + + return false; +} void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection) { @@ -161,7 +187,7 @@ void RabbitMQConsumer::updateChannel(RabbitMQConnection & connection) consumer_channel->onError([&](const char * message) { - LOG_ERROR(log, "Channel {} in an error state: {}", channel_id, message); + LOG_ERROR(log, "Channel {} in in error state: {}", channel_id, message); state = State::ERROR; }); } diff --git a/src/Storages/RabbitMQ/RabbitMQConsumer.h b/src/Storages/RabbitMQ/RabbitMQConsumer.h index c78b33bfc7c..9dad175dda3 100644 --- a/src/Storages/RabbitMQ/RabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/RabbitMQConsumer.h @@ -50,7 +50,9 @@ public: UInt64 delivery_tag = 0; String channel_id; }; + const MessageData & currentMessage() { return current; } + const String & getChannelID() const { return channel_id; } /// Return read buffer containing next available message /// or nullptr if there are no messages to process. @@ -63,6 +65,7 @@ public: bool isConsumerStopped() const { return stopped.load(); } bool ackMessages(const CommitInfo & commit_info); + bool nackMessages(const CommitInfo & commit_info); bool hasPendingMessages() { return !received.empty(); } diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 3cec448fc11..6c50d440373 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -120,10 +120,20 @@ Chunk RabbitMQSource::generateImpl() { auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); consumer = storage.popConsumer(timeout); + + if (consumer->needChannelUpdate()) + { + LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID()); + consumer->updateChannel(storage.getConnection()); + } } if (is_finished || !consumer || consumer->isConsumerStopped()) + { + LOG_TRACE(log, "RabbitMQSource is stopped (is_finished: {}, consumer_stopped: {})", + is_finished, consumer ? toString(consumer->isConsumerStopped()) : "No consumer"); return {}; + } /// Currently it is one time usage source: to make sure data is flushed /// strictly by timeout or by block size. @@ -254,13 +264,12 @@ Chunk RabbitMQSource::generateImpl() bool RabbitMQSource::sendAck() { - if (!consumer) - return false; + return consumer && consumer->ackMessages(commit_info); +} - if (!consumer->ackMessages(commit_info)) - return false; - - return true; +bool RabbitMQSource::sendNack() +{ + return consumer && consumer->nackMessages(commit_info); } } diff --git a/src/Storages/RabbitMQ/RabbitMQSource.h b/src/Storages/RabbitMQ/RabbitMQSource.h index 21d059bfae2..0d6fad97054 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.h +++ b/src/Storages/RabbitMQ/RabbitMQSource.h @@ -33,6 +33,7 @@ public: bool needChannelUpdate(); void updateChannel(); bool sendAck(); + bool sendNack(); private: StorageRabbitMQ & storage; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 868f48d0b7d..880602bf272 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1061,7 +1061,8 @@ bool StorageRabbitMQ::tryStreamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { auto source = std::make_shared( - *this, storage_snapshot, rabbitmq_context, column_names, block_size, max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false); + *this, storage_snapshot, rabbitmq_context, column_names, block_size, + max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false); sources.emplace_back(source); pipes.emplace_back(source); @@ -1069,13 +1070,25 @@ bool StorageRabbitMQ::tryStreamToViews() block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); + std::atomic_size_t rows = 0; + block_io.pipeline.setProgressCallback([&](const Progress & progress) { rows += progress.read_rows.load(); }); + if (!connection->getHandler().loopRunning()) startLoop(); + bool write_failed = false; + try { CompletedPipelineExecutor executor(block_io.pipeline); executor.execute(); } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + write_failed = true; + } + + LOG_TRACE(log, "Processed {} rows", rows); /* Note: sending ack() with loop running in another thread will lead to a lot of data races inside the library, but only in case * error occurs or connection is lost while ack is being sent @@ -1083,13 +1096,6 @@ bool StorageRabbitMQ::tryStreamToViews() deactivateTask(looping_task, false, true); size_t queue_empty = 0; - if (!hasDependencies(getStorageID())) - { - /// Do not commit to rabbitmq if the dependency was removed. - LOG_TRACE(log, "No dependencies, reschedule"); - return false; - } - if (!connection->isConnected()) { if (shutdown_called) @@ -1130,7 +1136,7 @@ bool StorageRabbitMQ::tryStreamToViews() * the same channel will also commit all previously not-committed messages. Anyway I do not think that for ack frame this * will ever happen. */ - if (!source->sendAck()) + if (write_failed ? source->sendNack() : source->sendAck()) { /// Iterate loop to activate error callbacks if they happened connection->getHandler().iterateLoop(); @@ -1142,6 +1148,19 @@ bool StorageRabbitMQ::tryStreamToViews() } } + if (write_failed) + { + LOG_TRACE(log, "Write failed, reschedule"); + return false; + } + + if (!hasDependencies(getStorageID())) + { + /// Do not commit to rabbitmq if the dependency was removed. + LOG_TRACE(log, "No dependencies, reschedule"); + return false; + } + if ((queue_empty == num_created_consumers) && (++read_attempts == MAX_FAILED_READ_ATTEMPTS)) { connection->heartbeat(); diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index b778e9fb556..d129543d68f 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -3549,3 +3549,6 @@ def test_attach_broken_table(rabbitmq_cluster): assert "CANNOT_CONNECT_RABBITMQ" in error error = instance.query_and_get_error("INSERT INTO rabbit_queue VALUES ('test')") assert "CANNOT_CONNECT_RABBITMQ" in error + + +# TODO: add a test From a6fe66dd57b59a15c4881b9cfebf1487b53e66a8 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 13 Feb 2024 17:56:20 +0000 Subject: [PATCH 041/145] Update tests --- .../0_stateless/00752_low_cardinality_lambda_argument.sql | 1 + .../0_stateless/00752_low_cardinality_left_array_join.sql | 1 + tests/queries/0_stateless/00945_bloom_filter_index.sql | 1 + tests/queries/0_stateless/01414_low_cardinality_nullable.sql | 2 ++ tests/queries/0_stateless/01441_low_cardinality_array_index.sql | 2 ++ tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 1 + tests/queries/0_stateless/02184_nested_tuple.sql | 1 + 7 files changed, 9 insertions(+) diff --git a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql index a4bdbd5653c..998ff2f54d3 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql +++ b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql @@ -1,3 +1,4 @@ +set allow_suspicious_low_cardinality_types=1; drop table if exists lc_lambda; create table lc_lambda (arr Array(LowCardinality(UInt64))) engine = Memory; insert into lc_lambda select range(number) from system.numbers limit 10; diff --git a/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql b/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql index 1c19700e34d..2d65f01a1b9 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql +++ b/tests/queries/0_stateless/00752_low_cardinality_left_array_join.sql @@ -1,3 +1,4 @@ +set allow_suspicious_low_cardinality_types=1; drop table if exists lc_left_aj; CREATE TABLE lc_left_aj ( diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index dc47e858c4d..faa7feda04d 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -1,3 +1,4 @@ +SET allow_suspicious_low_cardinality_types=1; DROP TABLE IF EXISTS single_column_bloom_filter; diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index 2d3d31e9b5c..cd5111faf45 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -1,3 +1,5 @@ +SET allow_suspicious_low_cardinality_types=1; + DROP TABLE IF EXISTS lc_nullable; CREATE TABLE lc_nullable ( diff --git a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql index 4b31a86edfb..b5e14c957c6 100644 --- a/tests/queries/0_stateless/01441_low_cardinality_array_index.sql +++ b/tests/queries/0_stateless/01441_low_cardinality_array_index.sql @@ -1,3 +1,5 @@ +SET allow_suspicious_low_cardinality_types=1; + DROP TABLE IF EXISTS t_01411; CREATE TABLE t_01411( diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index 22532529812..d405bb01fd9 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -1,3 +1,4 @@ +set allow_suspicious_low_cardinality_types=1; drop table if exists perf_lc_num; CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog; diff --git a/tests/queries/0_stateless/02184_nested_tuple.sql b/tests/queries/0_stateless/02184_nested_tuple.sql index 67a20e3dce1..09ed8eb7200 100644 --- a/tests/queries/0_stateless/02184_nested_tuple.sql +++ b/tests/queries/0_stateless/02184_nested_tuple.sql @@ -1,3 +1,4 @@ +SET allow_suspicious_low_cardinality_types=1; DROP TABLE IF EXISTS t_nested_tuple; CREATE TABLE t_nested_tuple From 73cf923033fb4feec9653fd6d2bd7111c652baeb Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 13 Feb 2024 18:03:00 +0000 Subject: [PATCH 042/145] Update test --- .../0_stateless/02500_numbers_inference.sh | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02500_numbers_inference.sh b/tests/queries/0_stateless/02500_numbers_inference.sh index ce9cd5bdc9f..5d863bd616f 100755 --- a/tests/queries/0_stateless/02500_numbers_inference.sh +++ b/tests/queries/0_stateless/02500_numbers_inference.sh @@ -8,10 +8,10 @@ $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1.2}')"; echo '{"x" : 1.2}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1}')"; echo '{"x" : 1}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')"; -echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')"; -echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')" --input_format_try_infer_exponent_floats=1; +echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')" --input_format_try_infer_exponent_floats=1; +echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; $CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, false]}')"; echo '{"x" : [1, 42.42, false]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; @@ -19,10 +19,10 @@ $CLICKHOUSE_LOCAL -q "desc format(TSV, '1.2')"; echo '1.2' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; $CLICKHOUSE_LOCAL -q "desc format(TSV, '1')"; echo '1' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')"; -echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; -$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')"; -echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')" --input_format_try_infer_exponent_floats=1; +echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')" --input_format_try_infer_exponent_floats=1; +echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test" --input_format_try_infer_exponent_floats=1; $CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, false]')"; echo '[1, 42.42, false]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; From ed5591e68cce58624bab515286a128352213377e Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 13 Feb 2024 18:20:57 +0000 Subject: [PATCH 043/145] changes due to review --- src/Functions/FunctionBinaryArithmetic.h | 78 ++++++++++-------------- 1 file changed, 31 insertions(+), 47 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 9b0afee5053..d2e74b0cb71 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -148,14 +148,7 @@ public: static constexpr bool allow_decimal = IsOperation::allow_decimal; static constexpr bool only_integer = IsOperation::div_int || IsOperation::div_int_or_zero; - /// Appropriate result type for binary operator on numeric types. "Date" can also mean - /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). - using ResultDataType = Switch< - /// Result must be Integer - Case::div_int || IsOperation::div_int_or_zero, DataTypeFromFieldType>, - - /// Decimal cases - Case || IsDataTypeDecimal), InvalidType>, + using DecimalResultType = Switch< Case< IsDataTypeDecimal && IsDataTypeDecimal && UseLeftDecimal, LeftDataType>, @@ -163,6 +156,18 @@ public: Case && IsIntegralOrExtended, LeftDataType>, Case && IsIntegralOrExtended, RightDataType>, + /// Decimal Real is not supported (traditional DBs convert Decimal Real to Real) + Case && !IsIntegralOrExtendedOrDecimal, InvalidType>, + Case && !IsIntegralOrExtendedOrDecimal, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType) + + /// Appropriate result type for binary operator on numeric types. "Date" can also mean + /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). + using ResultDataType = Switch< + /// Result must be Integer + Case::div_int || IsOperation::div_int_or_zero, DataTypeFromFieldType>, + /// Decimal cases + Case || IsDataTypeDecimal, DecimalResultType>, + /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64 Case::allow_decimal && IsDataTypeDecimal && IsFloatingPoint, DataTypeFloat64>, Case::allow_decimal && IsDataTypeDecimal && IsFloatingPoint, DataTypeFloat64>, @@ -1669,26 +1674,23 @@ public: if constexpr (!std::is_same_v) { + if constexpr (is_div_int || is_div_int_or_zero) + type_res = std::make_shared(); if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { - if constexpr (is_div_int || is_div_int_or_zero) - type_res = std::make_shared(); - else + if constexpr (is_division) { - if constexpr (is_division) + if (context->getSettingsRef().decimal_check_overflow) { - if (context->getSettingsRef().decimal_check_overflow) - { - /// Check overflow by using operands scale (based on big decimal division implementation details): - /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers - /// i.e. int_operand = decimal_operand*10^scale - /// For division, left operand will be scaled by right operand scale also to do big integer division, - /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale - /// So, we can check upfront possible overflow just by checking max scale used for left operand - /// Note: it doesn't detect all possible overflow during big decimal division - if (left.getScale() + right.getScale() > ResultDataType::maxPrecision()) - throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); - } + /// Check overflow by using operands scale (based on big decimal division implementation details): + /// big decimal arithmetic is based on big integers, decimal operands are converted to big integers + /// i.e. int_operand = decimal_operand*10^scale + /// For division, left operand will be scaled by right operand scale also to do big integer division, + /// BigInt result = left*10^(left_scale + right_scale) / right * 10^right_scale + /// So, we can check upfront possible overflow just by checking max scale used for left operand + /// Note: it doesn't detect all possible overflow during big decimal division + if (left.getScale() + right.getScale() > ResultDataType::maxPrecision()) + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); } ResultDataType result_type = decimalResultType(left, right); type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); @@ -1697,24 +1699,15 @@ public: else if constexpr (((IsDataTypeDecimal && IsFloatingPoint) || (IsDataTypeDecimal && IsFloatingPoint))) { - if constexpr (is_div_int || is_div_int_or_zero) - type_res = std::make_shared(); - else - type_res = std::make_shared(); + type_res = std::make_shared(); } else if constexpr (IsDataTypeDecimal) { - if constexpr (is_div_int || is_div_int_or_zero) - type_res = std::make_shared(); - else - type_res = std::make_shared(left.getPrecision(), left.getScale()); + type_res = std::make_shared(left.getPrecision(), left.getScale()); } else if constexpr (IsDataTypeDecimal) { - if constexpr (is_div_int || is_div_int_or_zero) - type_res = std::make_shared(); - else - type_res = std::make_shared(right.getPrecision(), right.getScale()); + type_res = std::make_shared(right.getPrecision(), right.getScale()); } else if constexpr (std::is_same_v) { @@ -2024,6 +2017,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A using LeftDataType = std::decay_t; using RightDataType = std::decay_t; using ResultDataType = typename BinaryOperationTraits::ResultDataType; + using DecimalResultType = typename BinaryOperationTraits::DecimalResultType; if constexpr (std::is_same_v) return nullptr; @@ -2075,19 +2069,9 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A col_left_size, right_nullmap); } + /// Here we check if we have `intDiv` or `intDivOrZero` and at least one of the arguments is decimal, because in this case originally we had result as decimal, so we need to convert result into integer after calculations else if constexpr (!decimal_with_float && (is_div_int || is_div_int_or_zero) && (IsDataTypeDecimal || IsDataTypeDecimal)) { - using DecimalResultType = Switch< - Case< - IsDataTypeDecimal && IsDataTypeDecimal && UseLeftDecimal, - LeftDataType>, - Case && IsDataTypeDecimal, RightDataType>, - Case && IsIntegralOrExtended, LeftDataType>, - Case && IsIntegralOrExtended, RightDataType>, - - /// Decimal Real is not supported (traditional DBs convert Decimal Real to Real) - Case && !IsIntegralOrExtendedOrDecimal, InvalidType>, - Case && !IsIntegralOrExtendedOrDecimal, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType) if constexpr (!std::is_same_v) { From ac601a0cb9a8a81e0b690df317b9c7a98f17e1ad Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 13 Feb 2024 23:12:21 +0000 Subject: [PATCH 044/145] fix build --- src/Functions/FunctionBinaryArithmetic.h | 31 +++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 667854a622b..fa2ff8fc275 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -147,17 +147,24 @@ private: /// it's not correct for Decimal public: static constexpr bool allow_decimal = IsOperation::allow_decimal; - using DecimalResultType = Switch< - Case< - IsDataTypeDecimal && IsDataTypeDecimal && UseLeftDecimal, - LeftDataType>, + using DecimalResultDataType = Switch< + Case, + Case && IsDataTypeDecimal && UseLeftDecimal, LeftDataType>, Case && IsDataTypeDecimal, RightDataType>, Case && IsIntegralOrExtended, LeftDataType>, Case && IsIntegralOrExtended, RightDataType>, - /// Decimal Real is not supported (traditional DBs convert Decimal Real to Real) + /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64 + Case && IsFloatingPoint, DataTypeFloat64>, + Case && IsFloatingPoint, DataTypeFloat64>, + + Case::bit_hamming_distance && IsIntegral && IsIntegral, DataTypeUInt8>, + Case::bit_hamming_distance && IsFixedString && IsFixedString, DataTypeUInt16>, + Case::bit_hamming_distance && IsString && IsString, DataTypeUInt64>, + + /// Decimal Real is not supported (traditional DBs convert Decimal Real to Real) Case && !IsIntegralOrExtendedOrDecimal, InvalidType>, - Case && !IsIntegralOrExtendedOrDecimal, InvalidType>>; /// Determine result decimal type as it would be with usual division (as we determine BinaryOperationTraits::ResultType) + Case && !IsIntegralOrExtendedOrDecimal, InvalidType>>; /// Appropriate result type for binary operator on numeric types. "Date" can also mean /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). @@ -165,7 +172,13 @@ public: /// Result must be Integer Case::div_int || IsOperation::div_int_or_zero, DataTypeFromFieldType>, /// Decimal cases - Case || IsDataTypeDecimal, DecimalResultType>, + Case || IsDataTypeDecimal, DecimalResultDataType>, + Case< + IsDataTypeDecimal && IsDataTypeDecimal && UseLeftDecimal, + LeftDataType>, + Case && IsDataTypeDecimal, RightDataType>, + Case && IsIntegralOrExtended, LeftDataType>, + Case && IsIntegralOrExtended, RightDataType>, /// e.g Decimal +-*/ Float, least(Decimal, Float), greatest(Decimal, Float) = Float64 Case::allow_decimal && IsDataTypeDecimal && IsFloatingPoint, DataTypeFloat64>, @@ -1678,7 +1691,7 @@ public: { if constexpr (is_div_int || is_div_int_or_zero) type_res = std::make_shared(); - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + else if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) { if constexpr (is_division) { @@ -2019,7 +2032,7 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A using LeftDataType = std::decay_t; using RightDataType = std::decay_t; using ResultDataType = typename BinaryOperationTraits::ResultDataType; - using DecimalResultType = typename BinaryOperationTraits::DecimalResultType; + using DecimalResultType = typename BinaryOperationTraits::DecimalResultDataType; if constexpr (std::is_same_v) return nullptr; From 59d8912f6361abc0df81e385cb1bf0f4e7cf42d1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 14 Feb 2024 12:20:11 +0100 Subject: [PATCH 045/145] Update 02327_try_infer_integers_schema_inference test --- .../0_stateless/02327_try_infer_integers_schema_inference.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql index 0ceed178865..a4a69f4fa40 100644 --- a/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql +++ b/tests/queries/0_stateless/02327_try_infer_integers_schema_inference.sql @@ -1,6 +1,7 @@ -- Tags: no-fasttest set input_format_try_infer_integers=1; +set input_format_try_infer_exponent_floats=1; select 'JSONEachRow'; desc format(JSONEachRow, '{"x" : 123}'); From 64779835fa90f9cb178084657f24330bca7c9506 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 14 Feb 2024 12:48:05 +0000 Subject: [PATCH 046/145] Update tests --- tests/performance/array_index_low_cardinality_numbers.xml | 4 ++++ tests/queries/0_stateless/02235_remote_fs_cache_stress.sh | 2 +- tests/queries/0_stateless/02735_parquet_encoder.sql | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/performance/array_index_low_cardinality_numbers.xml b/tests/performance/array_index_low_cardinality_numbers.xml index f8fa27df05f..a68a37ebfff 100644 --- a/tests/performance/array_index_low_cardinality_numbers.xml +++ b/tests/performance/array_index_low_cardinality_numbers.xml @@ -1,4 +1,8 @@ + + 1 + + DROP TABLE IF EXISTS perf_lc_num CREATE TABLE perf_lc_num( num UInt8, diff --git a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh index bc1a4cbfdd1..0b6b9f461b0 100755 --- a/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh +++ b/tests/queries/0_stateless/02235_remote_fs_cache_stress.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --multiquery --multiline --query=""" +${CLICKHOUSE_CLIENT} --allow_suspicious_low_cardinality_types=1 --multiquery --multiline --query=""" DROP TABLE IF EXISTS t_01411; DROP TABLE IF EXISTS t_01411_num; diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index 19125abf8da..fe45a2a317d 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -6,6 +6,7 @@ set output_format_parquet_data_page_size = 800; set output_format_parquet_batch_size = 100; set output_format_parquet_row_group_size_bytes = 1000000000; set engine_file_truncate_on_insert=1; +set allow_suspicious_low_cardinality_types=1; -- Write random data to parquet file, then read from it and check that it matches what we wrote. -- Do this for all kinds of data types: primitive, Nullable(primitive), Array(primitive), From 41304c3e11934e105dbb82dd805bc4414ab952ac Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 14 Feb 2024 14:08:20 +0100 Subject: [PATCH 047/145] Update src/Functions/FunctionBinaryArithmetic.h Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Functions/FunctionBinaryArithmetic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index fa2ff8fc275..967c2b73881 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -644,7 +644,8 @@ private: { if ((*right_nullmap)[0]) { - c[0] = ResultType(); + for (size_t i = 0; i < size; ++i) + c[i] = ResultType(); return; } From 40d4fc1c24e41639230030ae232bc43aec705bac Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 14 Feb 2024 12:11:45 +0100 Subject: [PATCH 048/145] More resilient Disk related operations --- programs/keeper-converter/KeeperConverter.cpp | 3 +- src/Coordination/Changelog.cpp | 209 ++++++++++++------ src/Coordination/CoordinationSettings.h | 5 +- .../KeeperAsynchronousMetrics.cpp | 8 +- src/Coordination/KeeperContext.cpp | 20 +- src/Coordination/KeeperContext.h | 19 +- src/Coordination/KeeperDispatcher.cpp | 2 +- src/Coordination/KeeperServer.cpp | 19 +- src/Coordination/KeeperServer.h | 2 - src/Coordination/KeeperSnapshotManager.cpp | 75 ++++++- src/Coordination/KeeperStateMachine.cpp | 12 +- src/Coordination/KeeperStateMachine.h | 3 - src/Coordination/KeeperStateManager.cpp | 21 +- src/Coordination/KeeperStateManager.h | 1 - src/Coordination/tests/gtest_coordination.cpp | 46 ++-- utils/keeper-data-dumper/main.cpp | 4 +- 16 files changed, 300 insertions(+), 149 deletions(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index 92bdea28738..8cd50d0892f 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -39,7 +40,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) try { - auto keeper_context = std::make_shared(true); + auto keeper_context = std::make_shared(true, std::make_shared()); keeper_context->setDigestEnabled(true); keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as())); diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 5a58932606e..40ece0e7d2e 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include #include #include @@ -35,21 +37,86 @@ namespace constexpr std::string_view tmp_prefix = "tmp_"; -void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to) +void moveFileBetweenDisks( + DiskPtr disk_from, + ChangelogFileDescriptionPtr description, + DiskPtr disk_to, + const std::string & path_to, + const KeeperContextPtr & keeper_context) { + auto logger = getLogger("Changelog"); + LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", description->path, path_to, disk_from->getName(), disk_to->getName()); /// we use empty file with prefix tmp_ to detect incomplete copies /// if a copy is complete we don't care from which disk we use the same file /// so it's okay if a failure happens after removing of tmp file but before we remove /// the changelog from the source disk auto from_path = fs::path(description->path); auto tmp_changelog_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); + + const auto & coordination_settings = keeper_context->getCoordinationSettings(); + auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value; + auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms); + auto run_with_retries = [&](const auto & op, std::string_view operation_description) { - auto buf = disk_to->writeFile(tmp_changelog_name); - buf->finalize(); + /// we limit the amount of retries during initialization phase because shutdown won't be set + /// before initialization is done, i.e. we would be stuck in infinite loop + size_t retry_num = 0; + do + { + try + { + op(); + return true; + } + catch (...) + { + tryLogCurrentException( + logger, + fmt::format( + "While moving changelog {} to disk {} and running '{}'", + description->path, + disk_to->getName(), + operation_description)); + std::this_thread::sleep_for(retries_sleep); + } + + ++retry_num; + if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init) + { + LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description); + break; + } + + } while (!keeper_context->isShutdownCalled()); + + LOG_ERROR( + getLogger("Changelog"), + "Failed to run '{}' while moving changelog {} to disk {}", + operation_description, + description->path, + disk_to->getName()); + return false; + }; + + std::array, std::string_view>, 4> operations{ + std::pair{ + [&] + { + auto buf = disk_to->writeFile(tmp_changelog_name); + buf->finalize(); + }, + "creating temporary file"}, + std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"}, + std::pair{[&] { disk_to->removeFileIfExists(tmp_changelog_name); }, "removing temporary file"}, + std::pair{[&] { disk_from->removeFileIfExists(description->path); }, "removing changelog file from source disk"}, + }; + + for (const auto & [op, operation_description] : operations) + { + if (!run_with_retries(op, operation_description)) + return; } - disk_from->copyFile(from_path, *disk_to, path_to, {}); - disk_to->removeFile(tmp_changelog_name); - disk_from->removeFile(description->path); + description->path = path_to; description->disk = disk_to; } @@ -173,7 +240,7 @@ public: } else { - moveFileBetweenDisks(log_disk, current_file_description, disk, new_path); + moveFileBetweenDisks(log_disk, current_file_description, disk, new_path, keeper_context); } } } @@ -196,7 +263,7 @@ public: } catch (...) { - tryLogCurrentException(log); + tryLogCurrentException(log, "While setting new changelog file"); throw; } } @@ -813,7 +880,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin auto disk = getDisk(); if (latest_log_disk != disk && latest_log_disk == description->disk) - moveFileBetweenDisks(latest_log_disk, description, disk, description->path); + moveFileBetweenDisks(latest_log_disk, description, disk, description->path, keeper_context); }; /// we can have empty log (with zero entries) and last_log_read_result will be initialized @@ -899,7 +966,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin } if (description->disk != disk) - moveFileBetweenDisks(description->disk, description, disk, description->path); + moveFileBetweenDisks(description->disk, description, disk, description->path, keeper_context); } @@ -921,7 +988,7 @@ void Changelog::initWriter(ChangelogFileDescriptionPtr description) auto log_disk = description->disk; auto latest_log_disk = getLatestLogDisk(); if (log_disk != latest_log_disk) - moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path); + moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context); current_writer->setFile(std::move(description), WriteMode::Append); } @@ -984,11 +1051,11 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) catch (const DB::Exception & e) { if (e.code() == DB::ErrorCodes::NOT_IMPLEMENTED) - moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path); + moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context); } } else - moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path); + moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context); itr = existing_changelogs.erase(itr); } @@ -1085,70 +1152,78 @@ void Changelog::writeThread() LOG_WARNING(log, "Changelog is shut down"); }; - /// NuRaft writes a batch of request by first calling multiple store requests, i.e. AppendLog - /// finished by a flush request - /// We assume that after some number of appends, we always get flush request - while (true) + try { - if (try_batch_flush) + /// NuRaft writes a batch of request by first calling multiple store requests, i.e. AppendLog + /// finished by a flush request + /// We assume that after some number of appends, we always get flush request + while (true) { - try_batch_flush = false; - /// we have Flush request stored in write operation - /// but we try to get new append operations - /// if there are none, we apply the currently set Flush - chassert(std::holds_alternative(write_operation)); - if (!write_operations.tryPop(write_operation)) + if (try_batch_flush) { - chassert(batch_append_ok); - const auto & flush = std::get(write_operation); - flush_logs(flush); - notify_append_completion(); - if (!write_operations.pop(write_operation)) - break; - } - } - else if (!write_operations.pop(write_operation)) - { - break; - } - - assert(initialized); - - if (auto * append_log = std::get_if(&write_operation)) - { - if (!batch_append_ok) - continue; - - std::lock_guard writer_lock(writer_mutex); - assert(current_writer); - - batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry)); - ++pending_appends; - } - else - { - const auto & flush = std::get(write_operation); - - if (batch_append_ok) - { - /// we can try batching more logs for flush - if (pending_appends < flush_settings.max_flush_batch_size) + try_batch_flush = false; + /// we have Flush request stored in write operation + /// but we try to get new append operations + /// if there are none, we apply the currently set Flush + chassert(std::holds_alternative(write_operation)); + if (!write_operations.tryPop(write_operation)) { - try_batch_flush = true; - continue; + chassert(batch_append_ok); + const auto & flush = std::get(write_operation); + flush_logs(flush); + notify_append_completion(); + if (!write_operations.pop(write_operation)) + break; } - /// we need to flush because we have maximum allowed pending records - flush_logs(flush); + } + else if (!write_operations.pop(write_operation)) + { + break; + } + + assert(initialized); + + if (auto * append_log = std::get_if(&write_operation)) + { + if (!batch_append_ok) + continue; + + std::lock_guard writer_lock(writer_mutex); + assert(current_writer); + + batch_append_ok = current_writer->appendRecord(buildRecord(append_log->index, append_log->log_entry)); + ++pending_appends; } else { - std::lock_guard lock{durable_idx_mutex}; - *flush.failed = true; + const auto & flush = std::get(write_operation); + + if (batch_append_ok) + { + /// we can try batching more logs for flush + if (pending_appends < flush_settings.max_flush_batch_size) + { + try_batch_flush = true; + continue; + } + /// we need to flush because we have maximum allowed pending records + flush_logs(flush); + } + else + { + std::lock_guard lock{durable_idx_mutex}; + *flush.failed = true; + } + notify_append_completion(); + batch_append_ok = true; } - notify_append_completion(); - batch_append_ok = true; } } + catch (...) + { + tryLogCurrentException(log, "Write thread failed, aborting"); + std::abort(); + } } @@ -1191,7 +1266,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) auto log_disk = description->disk; auto latest_log_disk = getLatestLogDisk(); if (log_disk != latest_log_disk) - moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path); + moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context); current_writer->setFile(std::move(description), WriteMode::Append); diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index a58f2b04797..358c6c4097e 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -52,7 +52,10 @@ struct Settings; M(UInt64, log_file_overallocate_size, 50 * 1024 * 1024, "If max_log_file_size is not set to 0, this value will be added to it for preallocating bytes on disk. If a log record is larger than this value, it could lead to uncaught out-of-space issues so a larger value is preferred", 0) \ M(UInt64, min_request_size_for_cache, 50 * 1024, "Minimal size of the request to cache the deserialization result. Caching can have negative effect on latency for smaller requests, set to 0 to disable", 0) \ M(UInt64, raft_limits_reconnect_limit, 50, "If connection to a peer is silent longer than this limit * (multiplied by heartbeat interval), we re-establish the connection.", 0) \ - M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) + M(Bool, async_replication, false, "Enable async replication. All write and read guarantees are preserved while better performance is achieved. Settings is disabled by default to not break backwards compatibility.", 0) \ + M(UInt64, disk_move_retries_wait_ms, 1000, "How long to wait between retries after a failure which happened while a file was being moved between disks.", 0) \ + M(UInt64, disk_move_retries_during_init, 100, "The amount of retries after a failure which happened while a file was being moved between disks during initialization.", 0) + DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) diff --git a/src/Coordination/KeeperAsynchronousMetrics.cpp b/src/Coordination/KeeperAsynchronousMetrics.cpp index 8f6e1dec6c1..96d4df39721 100644 --- a/src/Coordination/KeeperAsynchronousMetrics.cpp +++ b/src/Coordination/KeeperAsynchronousMetrics.cpp @@ -20,7 +20,6 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM size_t ephemerals_count = 0; size_t approximate_data_size = 0; size_t key_arena_size = 0; - size_t latest_snapshot_size = 0; size_t open_file_descriptor_count = 0; std::optional max_file_descriptor_count = 0; size_t followers = 0; @@ -46,11 +45,8 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM ephemerals_count = state_machine.getTotalEphemeralNodesCount(); approximate_data_size = state_machine.getApproximateDataSize(); key_arena_size = state_machine.getKeyArenaSize(); - latest_snapshot_size = state_machine.getLatestSnapshotBufSize(); session_with_watches = state_machine.getSessionsWithWatchesCount(); paths_watched = state_machine.getWatchedPathsCount(); - //snapshot_dir_size = keeper_dispatcher.getSnapDirSize(); - //log_dir_size = keeper_dispatcher.getLogDirSize(); # if defined(__linux__) || defined(__APPLE__) open_file_descriptor_count = getCurrentProcessFDCount(); @@ -76,7 +72,9 @@ void updateKeeperInformation(KeeperDispatcher & keeper_dispatcher, AsynchronousM new_values["KeeperApproximateDataSize"] = { approximate_data_size, "The approximate data size of ClickHouse Keeper, in bytes." }; new_values["KeeperKeyArenaSize"] = { key_arena_size, "The size in bytes of the memory arena for keys in ClickHouse Keeper." }; - new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." }; + /// TODO: value was incorrectly set to 0 previously for local snapshots + /// it needs to be fixed and it needs to be atomic to avoid deadlock + ///new_values["KeeperLatestSnapshotSize"] = { latest_snapshot_size, "The uncompressed size in bytes of the latest snapshot created by ClickHouse Keeper." }; new_values["KeeperOpenFileDescriptorCount"] = { open_file_descriptor_count, "The number of open file descriptors in ClickHouse Keeper." }; if (max_file_descriptor_count.has_value()) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index baad8d98e6a..14db1ad2a72 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -1,13 +1,15 @@ #include #include -#include -#include -#include -#include #include -#include #include +#include +#include +#include +#include +#include +#include + #include namespace DB @@ -20,9 +22,10 @@ extern const int BAD_ARGUMENTS; } -KeeperContext::KeeperContext(bool standalone_keeper_) +KeeperContext::KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr coordination_settings_) : disk_selector(std::make_shared()) , standalone_keeper(standalone_keeper_) + , coordination_settings(std::move(coordination_settings_)) { /// enable by default some feature flags feature_flags.enableFeatureFlag(KeeperFeatureFlag::FILTERED_LIST); @@ -416,4 +419,9 @@ void KeeperContext::waitLocalLogsPreprocessedOrShutdown() local_logs_preprocessed_cv.wait(lock, [this]{ return shutdown_called || local_logs_preprocessed; }); } +const CoordinationSettingsPtr & KeeperContext::getCoordinationSettings() const +{ + return coordination_settings; +} + } diff --git a/src/Coordination/KeeperContext.h b/src/Coordination/KeeperContext.h index 891bef00446..a5cc2db49a2 100644 --- a/src/Coordination/KeeperContext.h +++ b/src/Coordination/KeeperContext.h @@ -1,7 +1,5 @@ #pragma once #include -#include -#include #include #include #include @@ -12,10 +10,19 @@ namespace DB class KeeperDispatcher; +struct CoordinationSettings; +using CoordinationSettingsPtr = std::shared_ptr; + +class DiskSelector; +class IDisk; +using DiskPtr = std::shared_ptr; + +class WriteBufferFromOwnString; + class KeeperContext { public: - explicit KeeperContext(bool standalone_keeper_); + KeeperContext(bool standalone_keeper_, CoordinationSettingsPtr coordination_settings_); enum class Phase : uint8_t { @@ -68,6 +75,8 @@ public: void waitLocalLogsPreprocessedOrShutdown(); + const CoordinationSettingsPtr & getCoordinationSettings() const; + private: /// local disk defined using path or disk name using Storage = std::variant; @@ -89,7 +98,7 @@ private: std::atomic local_logs_preprocessed = false; std::atomic shutdown_called = false; - Phase server_state{Phase::INIT}; + std::atomic server_state{Phase::INIT}; bool ignore_system_path_on_startup{false}; bool digest_enabled{true}; @@ -113,6 +122,8 @@ private: KeeperDispatcher * dispatcher{nullptr}; std::atomic memory_soft_limit = 0; + + CoordinationSettingsPtr coordination_settings; }; using KeeperContextPtr = std::shared_ptr; diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 35bc953a705..4bd10352d3e 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -414,8 +414,8 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf { LOG_DEBUG(log, "Initializing storage dispatcher"); - keeper_context = std::make_shared(standalone_keeper); configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper); + keeper_context = std::make_shared(standalone_keeper, configuration_and_settings->coordination_settings); keeper_context->initialize(config, this); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 722b1303cc8..e3fd14c0e1a 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -119,20 +119,18 @@ KeeperServer::KeeperServer( KeeperSnapshotManagerS3 & snapshot_manager_s3, KeeperStateMachine::CommitCallback commit_callback) : server_id(configuration_and_settings_->server_id) - , coordination_settings(configuration_and_settings_->coordination_settings) , log(getLogger("KeeperServer")) , is_recovering(config.getBool("keeper_server.force_recovery", false)) , keeper_context{std::move(keeper_context_)} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) , enable_reconfiguration(config.getBool("keeper_server.enable_reconfiguration", false)) { - if (coordination_settings->quorum_reads) + if (keeper_context->getCoordinationSettings()->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); state_machine = nuraft::cs_new( responses_queue_, snapshots_queue_, - coordination_settings, keeper_context, config.getBool("keeper_server.upload_snapshot_on_exit", true) ? &snapshot_manager_s3 : nullptr, commit_callback, @@ -143,7 +141,6 @@ KeeperServer::KeeperServer( "keeper_server", "state", config, - coordination_settings, keeper_context); } @@ -226,7 +223,7 @@ void KeeperServer::loadLatestConfig() { auto latest_snapshot_config = state_machine->getClusterConfig(); auto latest_log_store_config = state_manager->getLatestConfigFromLogStore(); - auto async_replication = coordination_settings->async_replication; + auto async_replication = keeper_context->getCoordinationSettings()->async_replication; if (latest_snapshot_config && latest_log_store_config) { @@ -293,6 +290,8 @@ void KeeperServer::forceRecovery() void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6) { + const auto & coordination_settings = keeper_context->getCoordinationSettings(); + nuraft::raft_params params; params.parallel_log_appending_ = true; params.heart_beat_interval_ @@ -427,6 +426,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo { state_machine->init(); + const auto & coordination_settings = keeper_context->getCoordinationSettings(); state_manager->loadLogStore(state_machine->last_commit_index() + 1, coordination_settings->reserved_log_items); auto log_store = state_manager->load_log_store(); @@ -446,7 +446,7 @@ void KeeperServer::startup(const Poco::Util::AbstractConfiguration & config, boo void KeeperServer::shutdownRaftServer() { - size_t timeout = coordination_settings->shutdown_timeout.totalSeconds(); + size_t timeout = keeper_context->getCoordinationSettings()->shutdown_timeout.totalSeconds(); if (!raft_instance) { @@ -870,7 +870,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ /// Node first became leader, and after that some other node became leader. /// BecameFresh for this node will not be called because it was already fresh /// when it was leader. - if (leader_index < our_index + coordination_settings->fresh_log_gap) + if (leader_index < our_index + keeper_context->getCoordinationSettings()->fresh_log_gap) set_initialized(); } return nuraft::cb_func::ReturnCode::Ok; @@ -905,7 +905,7 @@ void KeeperServer::waitInit() { std::unique_lock lock(initialized_mutex); - int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); + int64_t timeout = keeper_context->getCoordinationSettings()->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout); } @@ -977,6 +977,7 @@ KeeperServer::ConfigUpdateState KeeperServer::applyConfigUpdate( ClusterUpdateActions KeeperServer::getRaftConfigurationDiff(const Poco::Util::AbstractConfiguration & config) { + const auto & coordination_settings = keeper_context->getCoordinationSettings(); auto diff = state_manager->getRaftConfigurationDiff(config, coordination_settings); if (!diff.empty()) @@ -1004,6 +1005,7 @@ void KeeperServer::applyConfigUpdateWithReconfigDisabled(const ClusterUpdateActi std::this_thread::sleep_for(sleep_time * (i + 1)); }; + const auto & coordination_settings = keeper_context->getCoordinationSettings(); if (const auto * add = std::get_if(&action)) { for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) @@ -1059,6 +1061,7 @@ bool KeeperServer::waitForConfigUpdateWithReconfigDisabled(const ClusterUpdateAc auto became_leader = [&] { LOG_INFO(log, "Became leader, aborting"); return false; }; auto backoff = [&](size_t i) { std::this_thread::sleep_for(sleep_time * (i + 1)); }; + const auto & coordination_settings = keeper_context->getCoordinationSettings(); if (const auto* add = std::get_if(&action)) { for (size_t i = 0; i < coordination_settings->configuration_change_tries_count && !is_recovering; ++i) diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index ef298df3efc..dd54539a92b 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -22,8 +22,6 @@ class KeeperServer private: const int server_id; - CoordinationSettingsPtr coordination_settings; - nuraft::ptr state_machine; nuraft::ptr state_manager; diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 091571b4a1a..61bcdf023cf 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -34,21 +35,77 @@ namespace { constexpr std::string_view tmp_prefix = "tmp_"; - void moveFileBetweenDisks(DiskPtr disk_from, const std::string & path_from, DiskPtr disk_to, const std::string & path_to) + void moveFileBetweenDisks( + DiskPtr disk_from, + const std::string & path_from, + DiskPtr disk_to, + const std::string & path_to, + const KeeperContextPtr & keeper_context) { + auto logger = getLogger("KeeperSnapshotManager"); + LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", path_from, path_to, disk_from->getName(), disk_to->getName()); /// we use empty file with prefix tmp_ to detect incomplete copies /// if a copy is complete we don't care from which disk we use the same file /// so it's okay if a failure happens after removing of tmp file but before we remove /// the snapshot from the source disk auto from_path = fs::path(path_from); auto tmp_snapshot_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); + + const auto & coordination_settings = keeper_context->getCoordinationSettings(); + auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value; + auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms); + auto run_with_retries = [&](const auto & op, std::string_view operation_description) { - auto buf = disk_to->writeFile(tmp_snapshot_name); - buf->finalize(); - } - disk_from->copyFile(from_path, *disk_to, path_to, {}); - disk_to->removeFile(tmp_snapshot_name); - disk_from->removeFile(path_from); + size_t retry_num = 0; + do + { + try + { + op(); + return true; + } + catch (...) + { + tryLogCurrentException( + logger, + fmt::format( + "While moving snapshot {} to disk {} and running '{}'", path_from, disk_to->getName(), operation_description)); + std::this_thread::sleep_for(retries_sleep); + } + + ++retry_num; + if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init) + { + LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description); + break; + } + } while (!keeper_context->isShutdownCalled()); + + LOG_ERROR( + logger, + "Failed to run '{}' while moving snapshot {} to disk {}", + operation_description, + path_from, + disk_to->getName()); + return false; + }; + + std::array, std::string_view>, 4> operations{ + std::pair{ + [&] + { + auto buf = disk_to->writeFile(tmp_snapshot_name); + buf->finalize(); + }, + "creating temporary file"}, + std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"}, + std::pair{[&] { disk_to->removeFileIfExists(tmp_snapshot_name); }, "removing temporary file"}, + std::pair{[&] { disk_from->removeFileIfExists(path_from); }, "removing snapshot file from source disk"}, + }; + + for (const auto & [op, operation_description] : operations) + if (!run_with_retries(op, operation_description)) + return; } uint64_t getSnapshotPathUpToLogIdx(const String & snapshot_path) @@ -774,7 +831,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() { if (file_info.disk != latest_snapshot_disk) { - moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path); + moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context); file_info.disk = latest_snapshot_disk; } } @@ -782,7 +839,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() { if (file_info.disk != disk) { - moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path); + moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context); file_info.disk = disk; } } diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index c82f8301eff..f83a49833a7 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -42,22 +42,20 @@ namespace ErrorCodes KeeperStateMachine::KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, CommitCallback commit_callback_, const std::string & superdigest_) : commit_callback(commit_callback_) - , coordination_settings(coordination_settings_) , snapshot_manager( - coordination_settings->snapshots_to_keep, + keeper_context_->getCoordinationSettings()->snapshots_to_keep, keeper_context_, - coordination_settings->compress_snapshots_with_zstd_format, + keeper_context_->getCoordinationSettings()->compress_snapshots_with_zstd_format, superdigest_, - coordination_settings->dead_session_check_period_ms.totalMilliseconds()) + keeper_context_->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds()) , responses_queue(responses_queue_) , snapshots_queue(snapshots_queue_) - , min_request_size_to_cache(coordination_settings_->min_request_size_for_cache) + , min_request_size_to_cache(keeper_context_->getCoordinationSettings()->min_request_size_for_cache) , last_committed_idx(0) , log(getLogger("KeeperStateMachine")) , superdigest(superdigest_) @@ -129,7 +127,7 @@ void KeeperStateMachine::init() if (!storage) storage = std::make_unique( - coordination_settings->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context); + keeper_context->getCoordinationSettings()->dead_session_check_period_ms.totalMilliseconds(), superdigest, keeper_context); } namespace diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index b11cd53c00e..f0a565aed8a 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -25,7 +25,6 @@ public: KeeperStateMachine( ResponsesQueue & responses_queue_, SnapshotsQueue & snapshots_queue_, - const CoordinationSettingsPtr & coordination_settings_, const KeeperContextPtr & keeper_context_, KeeperSnapshotManagerS3 * snapshot_manager_s3_, CommitCallback commit_callback_ = {}, @@ -139,8 +138,6 @@ private: SnapshotFileInfo latest_snapshot_info; nuraft::ptr latest_snapshot_buf = nullptr; - CoordinationSettingsPtr coordination_settings; - /// Main state machine logic KeeperStoragePtr storage TSA_PT_GUARDED_BY(storage_and_responses_lock); diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 4fbb9b52e6e..3f9c7aa3e44 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -241,23 +241,20 @@ KeeperStateManager::KeeperStateManager( const std::string & config_prefix_, const std::string & server_state_file_name_, const Poco::Util::AbstractConfiguration & config, - const CoordinationSettingsPtr & coordination_settings, KeeperContextPtr keeper_context_) : my_server_id(my_server_id_) , secure(config.getBool(config_prefix_ + ".raft_configuration.secure", false)) , config_prefix(config_prefix_) - , configuration_wrapper(parseServersConfiguration(config, false, coordination_settings->async_replication)) + , configuration_wrapper(parseServersConfiguration(config, false, keeper_context_->getCoordinationSettings()->async_replication)) , log_store(nuraft::cs_new( - LogFileSettings - { - .force_sync = coordination_settings->force_sync, - .compress_logs = coordination_settings->compress_logs, - .rotate_interval = coordination_settings->rotate_log_storage_interval, - .max_size = coordination_settings->max_log_file_size, - .overallocate_size = coordination_settings->log_file_overallocate_size}, - FlushSettings - { - .max_flush_batch_size = coordination_settings->max_flush_batch_size, + LogFileSettings{ + .force_sync = keeper_context_->getCoordinationSettings()->force_sync, + .compress_logs = keeper_context_->getCoordinationSettings()->compress_logs, + .rotate_interval = keeper_context_->getCoordinationSettings()->rotate_log_storage_interval, + .max_size = keeper_context_->getCoordinationSettings()->max_log_file_size, + .overallocate_size = keeper_context_->getCoordinationSettings()->log_file_overallocate_size}, + FlushSettings{ + .max_flush_batch_size = keeper_context_->getCoordinationSettings()->max_flush_batch_size, }, keeper_context_)) , server_state_file_name(server_state_file_name_) diff --git a/src/Coordination/KeeperStateManager.h b/src/Coordination/KeeperStateManager.h index 02dd6b2ff53..60f6dbe7b62 100644 --- a/src/Coordination/KeeperStateManager.h +++ b/src/Coordination/KeeperStateManager.h @@ -23,7 +23,6 @@ public: const std::string & config_prefix_, const std::string & server_state_file_name_, const Poco::Util::AbstractConfiguration & config, - const CoordinationSettingsPtr & coordination_settings, KeeperContextPtr keeper_context_); /// Constructor for tests diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index bd9dc4c3fd3..c6d98e6f3dd 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -65,7 +65,7 @@ struct CompressionParam class CoordinationTest : public ::testing::TestWithParam { protected: - DB::KeeperContextPtr keeper_context = std::make_shared(true); + DB::KeeperContextPtr keeper_context = std::make_shared(true, std::make_shared()); LoggerPtr log{getLogger("CoordinationTest")}; void SetUp() override @@ -1758,7 +1758,6 @@ getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Co } void testLogAndStateMachine( - Coordination::CoordinationSettingsPtr settings, uint64_t total_logs, bool enable_compression, Coordination::KeeperContextPtr keeper_context) @@ -1766,6 +1765,7 @@ void testLogAndStateMachine( using namespace Coordination; using namespace DB; + const auto & settings = keeper_context->getCoordinationSettings(); ChangelogDirTest snapshots("./snapshots"); keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); @@ -1773,7 +1773,7 @@ void testLogAndStateMachine( ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); DB::KeeperLogStore changelog( DB::LogFileSettings{ @@ -1816,7 +1816,7 @@ void testLogAndStateMachine( } SnapshotsQueue snapshots_queue1{1}; - auto restore_machine = std::make_shared(queue, snapshots_queue1, settings, keeper_context, nullptr); + auto restore_machine = std::make_shared(queue, snapshots_queue1, keeper_context, nullptr); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); @@ -1863,63 +1863,72 @@ TEST_P(CoordinationTest, TestStateMachineAndLogStore) settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 37, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(37, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 11, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(11, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(40, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 20; settings->rotate_log_storage_interval = 30; - testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(40, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 0; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(40, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 1; settings->reserved_log_items = 1; settings->rotate_log_storage_interval = 32; - testLogAndStateMachine(settings, 32, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(32, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 7; settings->rotate_log_storage_interval = 1; - testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(33, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(33, params.enable_compression, local_keeper_context); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 45, params.enable_compression, keeper_context); + auto local_keeper_context = std::make_shared(true, settings); + testLogAndStateMachine(45, params.enable_compression, local_keeper_context); } } @@ -1931,11 +1940,10 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove) ChangelogDirTest snapshots("./snapshots"); setSnapshotDirectory("./snapshots"); - CoordinationSettingsPtr settings = std::make_shared(); - ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); + + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); std::shared_ptr request_c = std::make_shared(); @@ -1965,11 +1973,10 @@ TEST_P(CoordinationTest, TestCreateNodeWithAuthSchemeForAclWhenAuthIsPrecommitte ChangelogDirTest snapshots("./snapshots"); setSnapshotDirectory("./snapshots"); - CoordinationSettingsPtr settings = std::make_shared(); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; @@ -2017,11 +2024,10 @@ TEST_P(CoordinationTest, TestSetACLWithAuthSchemeForAclWhenAuthIsPrecommitted) ChangelogDirTest snapshots("./snapshots"); setSnapshotDirectory("./snapshots"); - CoordinationSettingsPtr settings = std::make_shared(); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; - auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); String user_auth_data = "test_user:test_password"; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 351a4ab90bc..21626665a42 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -63,11 +63,11 @@ int main(int argc, char *argv[]) ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); - KeeperContextPtr keeper_context = std::make_shared(true); + KeeperContextPtr keeper_context = std::make_shared(true, settings); keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", argv[1])); - auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); + auto state_machine = std::make_shared(queue, snapshots_queue, keeper_context, nullptr); state_machine->init(); size_t last_commited_index = state_machine->last_commit_index(); From c1897e74a83101130ffa369dd6c58993c17f5e6f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 14 Feb 2024 14:04:25 +0000 Subject: [PATCH 049/145] fix --- src/Functions/FunctionBinaryArithmetic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 967c2b73881..9b4249b0aef 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -1708,9 +1708,9 @@ public: if (left.getScale() + right.getScale() > ResultDataType::maxPrecision()) throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Overflow during decimal division"); } - ResultDataType result_type = decimalResultType(left, right); - type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); } + ResultDataType result_type = decimalResultType(left, right); + type_res = std::make_shared(result_type.getPrecision(), result_type.getScale()); } else if constexpr (((IsDataTypeDecimal && IsFloatingPoint) || (IsDataTypeDecimal && IsFloatingPoint))) From 8fe9ede2078b4e52c40490622580a72b7839144f Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Feb 2024 15:09:41 +0100 Subject: [PATCH 050/145] Add a test --- src/Storages/RabbitMQ/RabbitMQSource.cpp | 12 +-- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- .../configs/mergetree.xml | 5 ++ .../integration/test_storage_rabbitmq/test.py | 75 ++++++++++++++++++- 4 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 tests/integration/test_storage_rabbitmq/configs/mergetree.xml diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 6c50d440373..25e4b120f42 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -120,12 +120,6 @@ Chunk RabbitMQSource::generateImpl() { auto timeout = std::chrono::milliseconds(context->getSettingsRef().rabbitmq_max_wait_ms.totalMilliseconds()); consumer = storage.popConsumer(timeout); - - if (consumer->needChannelUpdate()) - { - LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID()); - consumer->updateChannel(storage.getConnection()); - } } if (is_finished || !consumer || consumer->isConsumerStopped()) @@ -135,6 +129,12 @@ Chunk RabbitMQSource::generateImpl() return {}; } + if (consumer->needChannelUpdate()) + { + LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID()); + consumer->updateChannel(storage.getConnection()); + } + /// Currently it is one time usage source: to make sure data is flushed /// strictly by timeout or by block size. is_finished = true; diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 880602bf272..ec2048cca70 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1084,7 +1084,7 @@ bool StorageRabbitMQ::tryStreamToViews() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + LOG_ERROR(log, "Failed to push to views. Error: {}", getCurrentExceptionMessage(true)); write_failed = true; } diff --git a/tests/integration/test_storage_rabbitmq/configs/mergetree.xml b/tests/integration/test_storage_rabbitmq/configs/mergetree.xml new file mode 100644 index 00000000000..61eba8face7 --- /dev/null +++ b/tests/integration/test_storage_rabbitmq/configs/mergetree.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index d129543d68f..a6aee67e806 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -37,6 +37,18 @@ instance2 = cluster.add_instance( with_rabbitmq=True, ) +instance3 = cluster.add_instance( + "instance3", + user_configs=["configs/users.xml"], + main_configs=[ + "configs/rabbitmq.xml", + "configs/macros.xml", + "configs/named_collection.xml", + "configs/mergetree.xml", + ], + with_rabbitmq=True, +) + # Helpers @@ -84,6 +96,7 @@ def rabbitmq_cluster(): cluster.start() logging.debug("rabbitmq_id is {}".format(instance.cluster.rabbitmq_docker_id)) instance.query("CREATE DATABASE test") + instance3.query("CREATE DATABASE test") yield cluster @@ -3551,4 +3564,64 @@ def test_attach_broken_table(rabbitmq_cluster): assert "CANNOT_CONNECT_RABBITMQ" in error -# TODO: add a test +def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): + table_name = "nack_failed_insert" + exchange = f"{table_name}_exchange" + instance3.query( + f""" + CREATE TABLE test.{table_name} (key UInt64, value UInt64) + ENGINE = RabbitMQ + SETTINGS rabbitmq_host_port = '{rabbitmq_cluster.rabbitmq_host}:5672', + rabbitmq_flush_interval_ms=1000, + rabbitmq_exchange_name = '{exchange}', + rabbitmq_format = 'JSONEachRow'; + + DROP TABLE IF EXISTS test.view; + CREATE TABLE test.view (key UInt64, value UInt64) + ENGINE = MergeTree() + ORDER BY key; + + DROP TABLE IF EXISTS test.consumer; + CREATE MATERIALIZED VIEW test.consumer TO test.view AS + SELECT * FROM test.{table_name}; + """ + ) + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + num_rows = 25 + for i in range(num_rows): + message = json.dumps({"key": i, "value": i}) + "\n" + channel.basic_publish(exchange=exchange, routing_key="", body=message) + + connection.close() + + instance3.wait_for_log_line("Failed to push to views. Error: Code: 252. DB::Exception: Too many parts") + + instance3.replace_in_config( + "/etc/clickhouse-server/config.d/mergetree.xml", + "parts_to_throw_insert>1", + "parts_to_throw_insert>10", + ) + attempt = 0 + count = 0 + while attempt < 100: + count = int(instance3.query("SELECT count() FROM test.view")) + if count == num_rows: + break + attempt += 1 + + assert count == num_rows + + instance3.query( + f""" + DROP TABLE test.consumer; + DROP TABLE test.view; + DROP TABLE test.{table_name}; + """ + ) From 5f570b574e45162a8e050bc93fbb660e1a934286 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 14 Feb 2024 14:35:59 +0000 Subject: [PATCH 051/145] Automatic style fix --- tests/integration/test_storage_rabbitmq/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index a6aee67e806..5298d5d8ce2 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -3601,7 +3601,9 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): connection.close() - instance3.wait_for_log_line("Failed to push to views. Error: Code: 252. DB::Exception: Too many parts") + instance3.wait_for_log_line( + "Failed to push to views. Error: Code: 252. DB::Exception: Too many parts" + ) instance3.replace_in_config( "/etc/clickhouse-server/config.d/mergetree.xml", From eeaa9fb1bac01d394be483cc555293c6c0d952ab Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 15 Feb 2024 11:38:13 +0000 Subject: [PATCH 052/145] Update tests --- ...mn_must_not_override_past_values.reference | 38 +++++++++----- ...e_column_must_not_override_past_values.sql | 50 +++++++++++++++---- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference index a5a0370620b..461075e9607 100644 --- a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference +++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.reference @@ -1,33 +1,45 @@ +DEFAULT expressions -- Compact parts -Origin -1 2 +Before materialize +1 1 2 54321 After materialize -1 2 +1 1 2 54321 -- Wide parts -Origin -1 2 +Before materialize +1 1 2 54321 After materialize -1 2 +1 1 2 54321 -- Nullable column != physically absent -Origin -1 2 +Before materialize +1 1 2 \N 3 54321 After materialize -1 2 +1 1 2 \N 3 54321 -- Parts with renamed column -Origin -1 2 +Before materialize +1 1 2 54321 After rename -1 2 +1 1 2 54321 After materialize -1 2 +1 1 2 54321 +MATERIALIZED expressions +-- Compact parts +Before materialize +1 54321 +After materialize +1 65432 +-- Compact parts +Before materialize +1 54321 +After materialize +1 65432 diff --git a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql index 825c7eab048..cfdde287712 100644 --- a/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql +++ b/tests/queries/0_stateless/02946_materialize_column_must_not_override_past_values.sql @@ -1,11 +1,16 @@ SET mutations_sync = 2; +DROP TABLE IF EXISTS tab; + +-- Tests that existing parts which contain a non-default value in columns with DEFAULT expression remain unchanged by MATERIALIZE COLUMN> +SELECT 'DEFAULT expressions'; + SELECT '-- Compact parts'; CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id; -INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id, dflt) VALUES (1, 1); INSERT INTO tab (id) VALUES (2); -SELECT 'Origin'; +SELECT 'Before materialize'; SELECT * FROM tab ORDER BY id; ALTER TABLE tab MATERIALIZE COLUMN dflt; SELECT 'After materialize'; @@ -15,9 +20,9 @@ DROP TABLE tab; SELECT '-- Wide parts'; CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; -INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id, dflt) VALUES (1, 1); INSERT INTO tab (id) VALUES (2); -SELECT 'Origin'; +SELECT 'Before materialize'; SELECT * FROM tab ORDER BY id; ALTER TABLE tab MATERIALIZE COLUMN dflt; SELECT 'After materialize'; @@ -27,10 +32,10 @@ DROP TABLE tab; SELECT '-- Nullable column != physically absent'; CREATE TABLE tab (id Int64, dflt Nullable(Int64) DEFAULT 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; -INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id, dflt) VALUES (1, 1); INSERT INTO tab (id, dflt) VALUES (2, NULL); INSERT INTO tab (id) VALUES (3); -SELECT 'Origin'; +SELECT 'Before materialize'; SELECT * FROM tab ORDER BY id; ALTER TABLE tab MATERIALIZE COLUMN dflt; SELECT 'After materialize'; @@ -40,14 +45,41 @@ DROP TABLE tab; SELECT '-- Parts with renamed column'; CREATE TABLE tab (id Int64, dflt Int64 DEFAULT 54321) ENGINE MergeTree ORDER BY id; -INSERT INTO tab (id, dflt) VALUES (1, 2); +INSERT INTO tab (id, dflt) VALUES (1, 1); INSERT INTO tab (id) VALUES (2); -SELECT 'Origin'; +SELECT 'Before materialize'; SELECT * FROM tab ORDER BY id; ALTER TABLE tab RENAME COLUMN dflt TO dflt2; SELECT 'After rename'; SELECT * FROM tab ORDER BY id; -ALTER TABLE tab MATERIALIZE COLUMN bar; +ALTER TABLE tab MATERIALIZE COLUMN dflt2; SELECT 'After materialize'; SELECT * FROM tab ORDER BY id; DROP TABLE tab; + +-- But for columns with MATERIALIZED expression, all existing parts should be rewritten in case a new expression was set in the meantime. +SELECT 'MATERIALIZED expressions'; + +SELECT '-- Compact parts'; + +CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id; +INSERT INTO tab (id) VALUES (1); +SELECT 'Before materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432; +ALTER TABLE tab MATERIALIZE COLUMN mtrl; +SELECT 'After materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +DROP TABLE tab; + +SELECT '-- Compact parts'; + +CREATE TABLE tab (id Int64, mtrl Int64 MATERIALIZED 54321) ENGINE MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 1; +INSERT INTO tab (id) VALUES (1); +SELECT 'Before materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +ALTER TABLE tab MODIFY COLUMN mtrl Int64 MATERIALIZED 65432; +ALTER TABLE tab MATERIALIZE COLUMN mtrl; +SELECT 'After materialize'; +SELECT id, mtrl FROM tab ORDER BY id; +DROP TABLE tab; From efa823400b8ff6c8d7eb95f232c20fea1fcfb229 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 15 Feb 2024 13:19:02 +0000 Subject: [PATCH 053/145] Add IDataType::forEachChild and use it in nested types validation --- src/DataTypes/DataTypeArray.cpp | 5 + src/DataTypes/DataTypeArray.h | 1 + src/DataTypes/DataTypeLowCardinality.cpp | 6 + src/DataTypes/DataTypeLowCardinality.h | 2 + src/DataTypes/DataTypeMap.cpp | 8 ++ src/DataTypes/DataTypeMap.h | 2 + src/DataTypes/DataTypeNullable.cpp | 6 + src/DataTypes/DataTypeNullable.h | 3 + src/DataTypes/DataTypeTuple.cpp | 9 ++ src/DataTypes/DataTypeTuple.h | 2 + src/DataTypes/DataTypeVariant.cpp | 9 ++ src/DataTypes/DataTypeVariant.h | 2 + src/DataTypes/IDataType.h | 4 + .../parseColumnsListForTableFunction.cpp | 115 +++++++----------- 14 files changed, 104 insertions(+), 70 deletions(-) diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index 24cd759e2a5..6e5760933eb 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -69,6 +69,11 @@ String DataTypeArray::doGetPrettyName(size_t indent) const return s.str(); } +void DataTypeArray::forEachChild(const ChildCallback & callback) const +{ + callback(*nested); + nested->forEachChild(callback); +} static DataTypePtr create(const ASTPtr & arguments) { diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 6a09b3b530d..4423f137e1a 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -43,6 +43,7 @@ public: MutableColumnPtr createColumn() const override; + void forEachChild(const ChildCallback & callback) const override; Field getDefault() const override; diff --git a/src/DataTypes/DataTypeLowCardinality.cpp b/src/DataTypes/DataTypeLowCardinality.cpp index 3e94b533c7a..5af1f28cbad 100644 --- a/src/DataTypes/DataTypeLowCardinality.cpp +++ b/src/DataTypes/DataTypeLowCardinality.cpp @@ -153,6 +153,12 @@ SerializationPtr DataTypeLowCardinality::doGetDefaultSerialization() const return std::make_shared(dictionary_type); } +void DataTypeLowCardinality::forEachChild(const ChildCallback & callback) const +{ + callback(*dictionary_type); + dictionary_type->forEachChild(callback); +} + static DataTypePtr create(const ASTPtr & arguments) { diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 389e24ef2a9..cd926bb595c 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -60,6 +60,8 @@ public: static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type); static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys); + void forEachChild(const ChildCallback & callback) const override; + private: SerializationPtr doGetDefaultSerialization() const override; diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 1f246af74d3..4b85606ff26 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -143,6 +143,14 @@ DataTypePtr DataTypeMap::getNestedTypeWithUnnamedTuple() const return std::make_shared(std::make_shared(from_tuple.getElements())); } +void DataTypeMap::forEachChild(const DB::IDataType::ChildCallback & callback) const +{ + callback(*key_type); + key_type->forEachChild(callback); + callback(*value_type); + value_type->forEachChild(callback); +} + static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.size() != 2) diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 257888a8e44..7281cca1bb1 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -54,6 +54,8 @@ public: static bool checkKeyType(DataTypePtr key_type); + void forEachChild(const ChildCallback & callback) const override; + private: void assertKeyType() const; }; diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 484d779551f..16d5d41e5e5 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -61,6 +61,12 @@ SerializationPtr DataTypeNullable::doGetDefaultSerialization() const return std::make_shared(nested_data_type->getDefaultSerialization()); } +void DataTypeNullable::forEachChild(const ChildCallback & callback) const +{ + callback(*nested_data_type); + nested_data_type->forEachChild(callback); +} + static DataTypePtr create(const ASTPtr & arguments) { diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 7ad0e1ba5f1..b102c767993 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -43,6 +43,9 @@ public: bool canBePromoted() const override { return nested_data_type->canBePromoted(); } const DataTypePtr & getNestedType() const { return nested_data_type; } + + void forEachChild(const ChildCallback & callback) const override; + private: SerializationPtr doGetDefaultSerialization() const override; diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 5c9d5a3366e..26a871182a7 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -376,6 +376,15 @@ SerializationInfoPtr DataTypeTuple::getSerializationInfo(const IColumn & column) return std::make_shared(std::move(infos), names, SerializationInfo::Settings{}); } +void DataTypeTuple::forEachChild(const ChildCallback & callback) const +{ + for (const auto & elem : elems) + { + callback(*elem); + elem->forEachChild(callback); + } +} + static DataTypePtr create(const ASTPtr & arguments) { diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index db49b7f22d1..4e5a0c1b33c 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -70,6 +70,8 @@ public: String getNameByPosition(size_t i) const; bool haveExplicitNames() const { return have_explicit_names; } + + void forEachChild(const ChildCallback & callback) const override; }; } diff --git a/src/DataTypes/DataTypeVariant.cpp b/src/DataTypes/DataTypeVariant.cpp index 456b4ea03b6..0543507a14d 100644 --- a/src/DataTypes/DataTypeVariant.cpp +++ b/src/DataTypes/DataTypeVariant.cpp @@ -175,6 +175,15 @@ SerializationPtr DataTypeVariant::doGetDefaultSerialization() const return std::make_shared(std::move(serializations), std::move(variant_names), SerializationVariant::getVariantsDeserializeTextOrder(variants), getName()); } +void DataTypeVariant::forEachChild(const DB::IDataType::ChildCallback & callback) const +{ + for (const auto & variant : variants) + { + callback(*variant); + variant->forEachChild(callback); + } +} + static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) diff --git a/src/DataTypes/DataTypeVariant.h b/src/DataTypes/DataTypeVariant.h index d26ce4ea90f..2a2206f985a 100644 --- a/src/DataTypes/DataTypeVariant.h +++ b/src/DataTypes/DataTypeVariant.h @@ -54,6 +54,8 @@ public: /// Check if Variant has provided type in the list of variants and return its discriminator. std::optional tryGetVariantDiscriminator(const DataTypePtr & type) const; + void forEachChild(const ChildCallback & callback) const override; + private: std::string doGetName() const override; std::string doGetPrettyName(size_t indent) const override; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 48cc127746f..220658afda5 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -111,6 +111,10 @@ public: const SubcolumnCallback & callback, const SubstreamData & data); + /// Call callback for each nested type recursively. + using ChildCallback = std::function; + virtual void forEachChild(const ChildCallback &) const {} + Names getSubcolumnNames() const; virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const; diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 056674c4379..1499568cec9 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -7,11 +7,6 @@ #include #include #include -#include -#include -#include -#include - namespace DB { @@ -24,84 +19,64 @@ namespace ErrorCodes } -void validateDataType(const DataTypePtr & type, const DataTypeValidationSettings & settings) +void validateDataType(const DataTypePtr & type_to_check, const DataTypeValidationSettings & settings) { - if (!settings.allow_suspicious_low_cardinality_types) + auto validate_callback = [&](const IDataType & data_type) { - if (const auto * lc_type = typeid_cast(type.get())) + if (!settings.allow_suspicious_low_cardinality_types) { - if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType()))) - throw Exception( - ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, - "Creating columns of type {} is prohibited by default due to expected negative impact on performance. " - "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", - lc_type->getName()); + if (const auto * lc_type = typeid_cast(&data_type)) + { + if (!isStringOrFixedString(*removeNullable(lc_type->getDictionaryType()))) + throw Exception( + ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, + "Creating columns of type {} is prohibited by default due to expected negative impact on performance. " + "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", + lc_type->getName()); + } } - } - if (!settings.allow_experimental_object_type) - { - if (type->hasDynamicSubcolumns()) + if (!settings.allow_experimental_object_type) { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental Object type is not allowed. " - "Set setting allow_experimental_object_type = 1 in order to allow it", type->getName()); - } - } - - if (!settings.allow_suspicious_fixed_string_types) - { - if (const auto * fixed_string = typeid_cast(type.get())) - { - if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) + if (data_type.hasDynamicSubcolumns()) + { throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because fixed string with size > {} is suspicious. " - "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", - type->getName(), - MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); + "Cannot create column with type '{}' because experimental Object type is not allowed. " + "Set setting allow_experimental_object_type = 1 in order to allow it", + data_type.getName()); + } } - } - if (!settings.allow_experimental_variant_type) - { - if (isVariant(type)) + if (!settings.allow_suspicious_fixed_string_types) { - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, - "Cannot create column with type '{}' because experimental Variant type is not allowed. " - "Set setting allow_experimental_variant_type = 1 in order to allow it", type->getName()); + if (const auto * fixed_string = typeid_cast(&data_type)) + { + if (fixed_string->getN() > MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because fixed string with size > {} is suspicious. " + "Set setting allow_suspicious_fixed_string_types = 1 in order to allow it", + data_type.getName(), + MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS); + } } - } - if (const auto * nullable_type = typeid_cast(type.get())) - { - validateDataType(nullable_type->getNestedType(), settings); - } - else if (const auto * lc_type = typeid_cast(type.get())) - { - validateDataType(lc_type->getDictionaryType(), settings); - } - else if (const auto * array_type = typeid_cast(type.get())) - { - validateDataType(array_type->getNestedType(), settings); - } - else if (const auto * tuple_type = typeid_cast(type.get())) - { - for (const auto & element : tuple_type->getElements()) - validateDataType(element, settings); - } - else if (const auto * map_type = typeid_cast(type.get())) - { - validateDataType(map_type->getKeyType(), settings); - validateDataType(map_type->getValueType(), settings); - } - else if (const auto * variant_type = typeid_cast(type.get())) - { - for (const auto & variant : variant_type->getVariants()) - validateDataType(variant, settings); - } + if (!settings.allow_experimental_variant_type) + { + if (isVariant(data_type)) + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot create column with type '{}' because experimental Variant type is not allowed. " + "Set setting allow_experimental_variant_type = 1 in order to allow it", + data_type.getName()); + } + } + }; + + validate_callback(*type_to_check); + type_to_check->forEachChild(validate_callback); } ColumnsDescription parseColumnsListFromString(const std::string & structure, const ContextPtr & context) From daf7505e5e0893f15f7fa0e9eccc1adcd22f48bc Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 15 Feb 2024 14:12:32 +0100 Subject: [PATCH 054/145] Improve --- src/Coordination/Changelog.cpp | 230 ++++++++++++------ src/Coordination/Changelog.h | 26 +- src/Coordination/KeeperLogStore.cpp | 5 +- src/Coordination/tests/gtest_coordination.cpp | 33 ++- 4 files changed, 200 insertions(+), 94 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 07e9a3faeee..63bfb709125 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -69,10 +69,7 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip /// a different thread could be trying to read from the file /// we should make sure the source disk contains the file while read is in progress - { - std::lock_guard file_lock(description->file_mutex); - description->disk = disk_to; - } + description->withLock([&]{ description->disk = disk_to; }); disk_from->removeFile(description->path); description->path = path_to; } @@ -723,29 +720,33 @@ void LogEntryStorage::prefetchCommitLogs() for (const auto & prefetch_file_info : prefetch_info->file_infos) { const auto & [changelog_description, position, count] = prefetch_file_info; - std::lock_guard file_lock(changelog_description->file_mutex); - auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); - file->seek(position, SEEK_SET); - LOG_TRACE(log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position); - ProfileEvents::increment(ProfileEvents::KeeperLogsPrefetchedEntries, count); + changelog_description->withLock( + [&] + { + auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); + file->seek(position, SEEK_SET); + LOG_TRACE( + log, "Prefetching {} log entries from path {}, from position {}", count, changelog_description->path, position); + ProfileEvents::increment(ProfileEvents::KeeperLogsPrefetchedEntries, count); - for (size_t i = 0; i < count; ++i) - { - if (prefetch_info->cancel) - break; + for (size_t i = 0; i < count; ++i) + { + if (prefetch_info->cancel) + break; - auto record = readChangelogRecord(*file, changelog_description->path); - auto entry = logEntryFromRecord(record); - if (current_index != record.header.index) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Invalid index prefetched, expected {}, actual {}", - current_index, - record.header.index); + auto record = readChangelogRecord(*file, changelog_description->path); + auto entry = logEntryFromRecord(record); + if (current_index != record.header.index) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Invalid index prefetched, expected {}, actual {}", + current_index, + record.header.index); - commit_logs_cache.getPrefetchedCacheEntry(record.header.index).resolve(std::move(entry)); - ++current_index; - } + commit_logs_cache.getPrefetchedCacheEntry(record.header.index).resolve(std::move(entry)); + ++current_index; + } + }); if (prefetch_info->cancel) break; @@ -770,6 +771,7 @@ void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) con if (keeper_context->isShutdownCalled()) return; + /// commit logs is not empty and it's not next log if (!commit_logs_cache.empty() && commit_logs_cache.max_index_in_cache != last_committed_index) return; @@ -782,14 +784,24 @@ void LogEntryStorage::startCommitLogsPrefetch(uint64_t last_committed_index) con auto new_prefetch_info = std::make_shared(); auto & [prefetch_from, prefetch_to] = new_prefetch_info->commit_prefetch_index_range; + /// if there are no entries in commit cache we will start from the next log that will be committed /// otherwise we continue appending the commit cache from the latest entry stored in it - size_t current_index = commit_logs_cache.cache.empty() ? last_committed_index + 1 : commit_logs_cache.max_index_in_cache + 1; + size_t current_index = commit_logs_cache.empty() ? last_committed_index + 1 : commit_logs_cache.max_index_in_cache + 1; + prefetch_from = current_index; + size_t total_size = 0; std::vector file_infos; FileReadInfo * current_file_info = nullptr; - for (; latest_logs_cache.empty() || current_index < latest_logs_cache.min_index_in_cache; ++current_index) + + size_t max_index_for_prefetch = 0; + if (!latest_logs_cache.empty()) + max_index_for_prefetch = latest_logs_cache.min_index_in_cache - 1; + else + max_index_for_prefetch = max_index_with_location; + + for (; current_index <= max_index_for_prefetch; ++current_index) { const auto & [changelog_description, position, size] = logs_location.at(current_index); if (total_size == 0) @@ -841,9 +853,8 @@ void LogEntryStorage::InMemoryCache::addEntry(uint64_t index, size_t size, Cache { auto [_, inserted] = cache.emplace(index, std::move(log_entry)); if (!inserted) - { throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to insert log with index {} which is already present in cache", index); - } + updateStatsWithNewEntry(index, size); } @@ -991,6 +1002,8 @@ void LogEntryStorage::addEntry(uint64_t index, const LogEntryPtr & log_entry) latest_config_index = index; logs_with_config_changes.insert(index); } + + updateTermInfoWithNewEntry(index, log_entry->get_term()); } bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size) @@ -1002,6 +1015,15 @@ bool LogEntryStorage::shouldMoveLogToCommitCache(uint64_t index, size_t log_entr return commit_logs_cache.max_index_in_cache == index - 1 && commit_logs_cache.hasSpaceAvailable(log_entry_size); } +void LogEntryStorage::updateTermInfoWithNewEntry(uint64_t index, uint64_t term) +{ + if (!log_term_infos.empty() && log_term_infos.back().term == term) + return; + + chassert(log_term_infos.empty() || log_term_infos.back().term == term - 1); + log_term_infos.push_back(LogTermInfo{.term = term, .first_index = index}); +} + void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location) { auto entry_size = logEntrySize(log_entry); @@ -1026,14 +1048,13 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l latest_config_index = index; logs_with_config_changes.insert(index); } + + updateTermInfoWithNewEntry(index, log_entry->get_term()); } void LogEntryStorage::cleanUpTo(uint64_t index) { latest_logs_cache.cleanUpTo(index); - /// uncommitted logs should never be compacted so we don't have to handle - /// logs that are currently being prefetched - commit_logs_cache.cleanUpTo(index); if (!logs_location.empty() && index > min_index_with_location) { @@ -1056,6 +1077,27 @@ void LogEntryStorage::cleanUpTo(uint64_t index) } } + + /// uncommitted logs should be compacted only if we received snapshot from leader + if (current_prefetch_info && !current_prefetch_info->done) + { + auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range; + /// if we will clean some logs that are currently prefetched, stop prefetching + /// and clean all logs that were being prefetched + if (index > prefetch_from) + { + current_prefetch_info->cancel = true; + current_prefetch_info->done.wait(false); + commit_logs_cache.cleanUpTo(std::max(prefetch_to + 1, index)); + } + /// start prefetching logs for committing at the current index + /// the last log index in the snapshot should be the + /// last log we cleaned up + startCommitLogsPrefetch(index - 1); + } + else + commit_logs_cache.cleanUpTo(index); + std::erase_if(logs_with_config_changes, [&](const auto conf_index) { return conf_index < index; }); if (auto it = std::max_element(logs_with_config_changes.begin(), logs_with_config_changes.end()); it != logs_with_config_changes.end()) { @@ -1067,31 +1109,24 @@ void LogEntryStorage::cleanUpTo(uint64_t index) if (first_log_index < index) first_log_entry = nullptr; + + /// remove all the term infos we don't need (all terms that start before index) + uint64_t last_removed_term = 0; + while (!log_term_infos.empty() && log_term_infos.front().first_index < index) + { + last_removed_term = log_term_infos.front().term; + log_term_infos.pop_front(); + } + + /// the last removed term info could contain terms for some indices we didn't cleanup + /// so we add the last removed term info back but with new first index + if (last_removed_term != 0 && (log_term_infos.empty() || log_term_infos.front().first_index > index)) + log_term_infos.push_front(LogTermInfo{.term = last_removed_term, .first_index = index}); } void LogEntryStorage::cleanAfter(uint64_t index) { latest_logs_cache.cleanAfter(index); - /// if we cleared all latest logs, there is a possibility we would need to clear commit logs - if (latest_logs_cache.empty()) - { - /// we will clean everything after the index, if there is a prefetch in progress - /// wait until we fetch everything until index - /// afterwards we can stop prefetching of newer logs because they will be cleaned up - commit_logs_cache.getEntry(index); - if (current_prefetch_info && !current_prefetch_info->done) - { - auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range; - if (index >= prefetch_from && index <= prefetch_to) - { - current_prefetch_info->cancel = true; - current_prefetch_info->done.wait(false); - } - } - - commit_logs_cache.cleanAfter(index); - startCommitLogsPrefetch(keeper_context->lastCommittedIndex()); - } if (!logs_location.empty() && index < max_index_with_location) { @@ -1114,9 +1149,33 @@ void LogEntryStorage::cleanAfter(uint64_t index) } } - if (empty()) - /// if we don't store any logs, reset first log cache + /// if we cleared all latest logs, there is a possibility we would need to clear commit logs + if (latest_logs_cache.empty()) + { + /// we will clean everything after the index, if there is a prefetch in progress + /// wait until we fetch everything until index + /// afterwards we can stop prefetching of newer logs because they will be cleaned up + commit_logs_cache.getEntry(index); + if (current_prefetch_info && !current_prefetch_info->done) + { + auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range; + /// if we will clean some logs that are currently prefetched, stop prefetching + if (index < prefetch_to) + { + current_prefetch_info->cancel = true; + current_prefetch_info->done.wait(false); + } + } + + commit_logs_cache.cleanAfter(index); + startCommitLogsPrefetch(keeper_context->lastCommittedIndex()); + } + + if (empty() || first_log_index > index) + { + /// if we don't store any logs or if the first log index changed, reset first log cache first_log_entry = nullptr; + } std::erase_if(logs_with_config_changes, [&](const auto conf_index) { return conf_index > index; }); if (auto it = std::max_element(logs_with_config_changes.begin(), logs_with_config_changes.end()); it != logs_with_config_changes.end()) @@ -1127,8 +1186,9 @@ void LogEntryStorage::cleanAfter(uint64_t index) else latest_config = nullptr; - if (first_log_index > index) - first_log_entry = nullptr; + /// remove all the term infos we don't need (all terms that start after index) + while (!log_term_infos.empty() && log_term_infos.back().first_index > index) + log_term_infos.pop_back(); } bool LogEntryStorage::contains(uint64_t index) const @@ -1165,13 +1225,22 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const else if (auto it = logs_location.find(index); it != logs_location.end()) { const auto & [changelog_description, position, size] = it->second; - std::lock_guard file_lock(changelog_description->file_mutex); - auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); - file->seek(position, SEEK_SET); - LOG_TRACE(log, "Reading log entry at index {} from path {}, position {}, size {}", index, changelog_description->path, position, size); + changelog_description->withLock( + [&] + { + auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); + file->seek(position, SEEK_SET); + LOG_TRACE( + log, + "Reading log entry at index {} from path {}, position {}, size {}", + index, + changelog_description->path, + position, + size); - auto record = readChangelogRecord(*file, changelog_description->path); - entry = logEntryFromRecord(record); + auto record = readChangelogRecord(*file, changelog_description->path); + entry = logEntryFromRecord(record); + }); /// if we fetched the first log entry, we will cache it because it's often accessed if (first_log_entry == nullptr && index == getFirstIndex()) @@ -1197,10 +1266,18 @@ LogEntryPtr LogEntryStorage::getLatestConfigChange() const return latest_config; } -void LogEntryStorage::cacheFirstLog(uint64_t first_index) +uint64_t LogEntryStorage::termAt(uint64_t index) const { - first_log_entry = getEntry(first_index); - first_log_index = first_index; + uint64_t term_for_index = 0; + for (const auto [term, first_index] : log_term_infos) + { + if (index < first_index) + return term_for_index; + + term_for_index = term; + } + + return term_for_index; } void LogEntryStorage::addLogLocations(std::vector> && indices_with_log_locations) @@ -1268,16 +1345,19 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end const auto & [file_description, start_position, count] = *read_info; LOG_TRACE(log, "Reading from path {} {} entries", file_description->path, count); - std::lock_guard file_lock(file_description->file_mutex); - auto file = file_description->disk->readFile(file_description->path); - file->seek(start_position, SEEK_SET); + file_description->withLock( + [&] + { + auto file = file_description->disk->readFile(file_description->path); + file->seek(start_position, SEEK_SET); - for (size_t i = 0; i < count; ++i) - { - auto record = readChangelogRecord(*file, file_description->path); - ret->push_back(logEntryFromRecord(record)); - ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile); - } + for (size_t i = 0; i < count; ++i) + { + auto record = readChangelogRecord(*file, file_description->path); + ret->push_back(logEntryFromRecord(record)); + ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile); + } + }); read_info.reset(); }; @@ -2168,7 +2248,11 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer) bool Changelog::isConfigLog(uint64_t index) const { return entry_storage.isConfigLog(index); +} +uint64_t Changelog::termAt(uint64_t index) const +{ + return entry_storage.termAt(index); } bool Changelog::flush() diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index f560b908ea7..d18f6b84283 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -81,6 +81,13 @@ struct ChangelogFileDescription /// How many entries should be stored in this log uint64_t expectedEntriesCountInLog() const { return to_log_index - from_log_index + 1; } + + template + void withLock(TFunction && fn) + { + std::lock_guard lock(file_mutex); + fn(); + } }; using ChangelogFileDescriptionPtr = std::shared_ptr; @@ -168,8 +175,7 @@ struct LogEntryStorage LogEntryPtr getEntry(uint64_t index) const; void clear(); LogEntryPtr getLatestConfigChange() const; - - void cacheFirstLog(uint64_t first_index); + uint64_t termAt(uint64_t index) const; using IndexWithLogLocation = std::pair; @@ -195,6 +201,8 @@ private: bool shouldMoveLogToCommitCache(uint64_t index, size_t log_entry_size); + void updateTermInfoWithNewEntry(uint64_t index, uint64_t term); + struct InMemoryCache { explicit InMemoryCache(size_t size_threshold_); @@ -269,6 +277,17 @@ private: /// store indices of logs that contain config changes std::unordered_set logs_with_config_changes; + struct LogTermInfo + { + uint64_t term = 0; + uint64_t first_index = 0; + }; + + /// store first index of each term + /// so we don't have to fetch log to return that information + /// terms are monotonically increasing so first index is enough + std::deque log_term_infos; + bool is_shutdown = false; KeeperContextPtr keeper_context; LoggerPtr log; @@ -324,6 +343,7 @@ public: void applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer); bool isConfigLog(uint64_t index) const; + uint64_t termAt(uint64_t index) const; /// Fsync latest log to disk and flush buffer bool flush(); @@ -384,8 +404,6 @@ private: LogEntryStorage entry_storage; - std::unordered_set conf_logs_indices; - uint64_t max_log_id = 0; /// For compaction, queue of delete not used logs /// 128 is enough, even if log is not removed, it's not a problem diff --git a/src/Coordination/KeeperLogStore.cpp b/src/Coordination/KeeperLogStore.cpp index f4d850c94ee..820039d8a8f 100644 --- a/src/Coordination/KeeperLogStore.cpp +++ b/src/Coordination/KeeperLogStore.cpp @@ -75,10 +75,7 @@ bool KeeperLogStore::is_conf(uint64_t index) uint64_t KeeperLogStore::term_at(uint64_t index) { std::lock_guard lock(changelog_lock); - auto entry = changelog.entryAt(index); - if (entry) - return entry->get_term(); - return 0; + return changelog.termAt(index); } nuraft::ptr KeeperLogStore::pack(uint64_t index, int32_t cnt) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 3c1b8fbd359..61ff8c3f16a 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1762,19 +1762,25 @@ getLogEntryFromZKRequest(size_t term, int64_t session_id, int64_t zxid, const Co void testLogAndStateMachine( Coordination::CoordinationSettingsPtr settings, uint64_t total_logs, - bool enable_compression, - Coordination::KeeperContextPtr keeper_context) + bool enable_compression) { using namespace Coordination; using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + + auto get_keeper_context = [&] + { + auto local_keeper_context = std::make_shared(true); + local_keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); + local_keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + return local_keeper_context; + }; ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; + auto keeper_context = get_keeper_context(); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); DB::KeeperLogStore changelog( @@ -1821,6 +1827,7 @@ void testLogAndStateMachine( } SnapshotsQueue snapshots_queue1{1}; + keeper_context = get_keeper_context(); auto restore_machine = std::make_shared(queue, snapshots_queue1, settings, keeper_context, nullptr); restore_machine->init(); EXPECT_EQ(restore_machine->last_commit_index(), total_logs - total_logs % settings->snapshot_distance); @@ -1868,63 +1875,63 @@ TEST_P(CoordinationTest, TestStateMachineAndLogStore) settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 37, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 37, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 11, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 11, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 10; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 40, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 20; settings->rotate_log_storage_interval = 30; - testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 40, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 0; settings->rotate_log_storage_interval = 10; - testLogAndStateMachine(settings, 40, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 40, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 1; settings->reserved_log_items = 1; settings->rotate_log_storage_interval = 32; - testLogAndStateMachine(settings, 32, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 32, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 10; settings->reserved_log_items = 7; settings->rotate_log_storage_interval = 1; - testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 33, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 33, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 33, params.enable_compression); } { CoordinationSettingsPtr settings = std::make_shared(); settings->snapshot_distance = 37; settings->reserved_log_items = 1000; settings->rotate_log_storage_interval = 5000; - testLogAndStateMachine(settings, 45, params.enable_compression, keeper_context); + testLogAndStateMachine(settings, 45, params.enable_compression); } } From 1edf3b2254b1300308f3f8691de94f100e900098 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 15 Feb 2024 16:19:56 +0000 Subject: [PATCH 055/145] Replace ORDER BY ALL by ORDER BY * to get rid of ambiguities --- .../statements/select/order-by.md | 3 +- .../statements/select/order-by.md | 6 +- src/Analyzer/Passes/QueryAnalysisPass.cpp | 3 - src/Analyzer/QueryNode.h | 4 +- src/Interpreters/TreeRewriter.cpp | 19 +--- src/Parsers/ASTSelectQuery.cpp | 2 +- src/Parsers/ParserSelectQuery.cpp | 24 ++--- .../0_stateless/02567_and_consistency.sql | 10 +- .../02884_string_distance_function.sql | 14 +-- .../0_stateless/02943_order_by_all.reference | 34 +------ .../0_stateless/02943_order_by_all.sql | 96 ++++--------------- .../02962_join_using_bug_57894.sql | 12 +-- 12 files changed, 61 insertions(+), 166 deletions(-) diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index bea5dcab461..29aca70762e 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -9,10 +9,9 @@ The `ORDER BY` clause contains - a list of expressions, e.g. `ORDER BY visits, search_phrase`, - a list of numbers referring to columns in the `SELECT` clause, e.g. `ORDER BY 2, 1`, or -- `ALL` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY ALL`. +- `*` (without other expressions or numbers) which means all columns of the `SELECT` clause: `ORDER BY *`. To disable sorting by column numbers, set setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments) = 0. -`ORDER BY ALL` cannot be used when the `SELECT` clause contains identifiers or aliases named `all` (case-insensitively). The `ORDER BY` clause can be attributed by a `DESC` (descending) or `ASC` (ascending) modifier which determines the sorting direction. Unless an explicit sort order is specified, `ASC` is used by default. diff --git a/docs/zh/sql-reference/statements/select/order-by.md b/docs/zh/sql-reference/statements/select/order-by.md index 3286fc9f9e7..9540c96a10d 100644 --- a/docs/zh/sql-reference/statements/select/order-by.md +++ b/docs/zh/sql-reference/statements/select/order-by.md @@ -61,14 +61,14 @@ sidebar_label: ORDER BY 我们只建议使用 `COLLATE` 对于少量行的最终排序,因为排序与 `COLLATE` 比正常的按字节排序效率低。 -## ORDER BY ALL +## ORDER BY * -`ORDER BY ALL` 对所有选定的列进行升序排序。 +`ORDER BY *` 对所有选定的列进行升序排序。 示例: ``` sql -SELECT a, b, c FROM t ORDER BY ALL +SELECT a, b, c FROM t ORDER BY * ``` 等同于: diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index a2c719606d8..f93f7cf2a25 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2357,9 +2357,6 @@ void QueryAnalyzer::expandOrderByAll(QueryNode & query_tree_node_typed) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expression nodes list expected 1 projection names. Actual {}", projection_names.size()); - if (Poco::toUpper(projection_names[0]) == "ALL") - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, - "Cannot use ORDER BY ALL to sort a column with name 'all', please disable setting `enable_order_by_all` and try again"); } auto sort_node = std::make_shared(node, all_node->getSortDirection(), all_node->getNullsSortDirection()); diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h index d8b8741afb2..1b389572e42 100644 --- a/src/Analyzer/QueryNode.h +++ b/src/Analyzer/QueryNode.h @@ -219,13 +219,13 @@ public: is_group_by_all = is_group_by_all_value; } - /// Returns true, if query node has ORDER BY ALL modifier, false otherwise + /// Returns true, if query node has ORDER BY * modifier, false otherwise bool isOrderByAll() const { return is_order_by_all; } - /// Set query node ORDER BY ALL modifier value + /// Set query node ORDER BY * modifier value void setIsOrderByAll(bool is_order_by_all_value) { is_order_by_all = is_order_by_all_value; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 0a260969cd4..14fbc9ebebb 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -787,23 +787,6 @@ void expandOrderByAll(ASTSelectQuery * select_query) for (const auto & expr : select_query->select()->children) { - if (auto * identifier = expr->as(); identifier != nullptr) - { - if (identifier->alias.empty()) - { - if (Poco::toUpper(identifier->name()) == "ALL") - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column with name 'all'"); - } - else - { - if (Poco::toUpper(identifier->alias) == "ALL") - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort a column alias with name 'all'"); - } - } - if (auto * function = expr->as(); function != nullptr) - if (Poco::toUpper(function->alias) == "ALL") - throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Cannot use ORDER BY ALL to sort an expression with name 'all'"); - auto elem = std::make_shared(); elem->direction = all_elem->direction; elem->nulls_direction = all_elem->nulls_direction; @@ -1330,7 +1313,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( if (select_query->group_by_all) expandGroupByAll(select_query); - // expand ORDER BY ALL + // expand ORDER BY * if (select_query->order_by_all) expandOrderByAll(select_query); diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 2115de1c124..d38e0933981 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -165,7 +165,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F if (order_by_all) { - s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY ALL" << (s.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY *" << (s.hilite ? hilite_none : ""); auto * elem = orderBy()->children[0]->as(); s.ostr << (s.hilite ? hilite_keyword : "") diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 641e74b5f18..6397a2a2a55 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -1,21 +1,23 @@ -#include +#include + +#include +#include +#include +#include #include +#include #include -#include #include #include #include -#include +#include #include -#include +#include #include #include -#include -#include -#include -#include #include +#include namespace DB { @@ -290,9 +292,9 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else if (order_expression_list->children.size() == 1) { - /// ORDER BY ALL - auto * identifier = order_expression_list->children[0]->as()->children[0]->as(); - if (identifier != nullptr && Poco::toUpper(identifier->name()) == "ALL") + /// ORDER BY * + auto * asterisk = order_expression_list->children[0]->as()->children[0]->as(); + if (asterisk != nullptr) select_query->order_by_all = true; } } diff --git a/tests/queries/0_stateless/02567_and_consistency.sql b/tests/queries/0_stateless/02567_and_consistency.sql index 0eeab99e539..b1fa526e33f 100644 --- a/tests/queries/0_stateless/02567_and_consistency.sql +++ b/tests/queries/0_stateless/02567_and_consistency.sql @@ -5,7 +5,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(sum(number)) -ORDER BY ALL +ORDER BY * SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; @@ -17,7 +17,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(1) -ORDER BY ALL +ORDER BY * SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; @@ -29,7 +29,7 @@ FROM ) GROUP BY number HAVING x AND sin(sum(number)) -ORDER BY ALL +ORDER BY * SETTINGS enable_optimize_predicate_expression = 1; SELECT '====='; @@ -41,7 +41,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(sum(number)) -ORDER BY ALL +ORDER BY * SETTINGS enable_optimize_predicate_expression = 0; SELECT '====='; @@ -61,7 +61,7 @@ FROM ) GROUP BY number HAVING 1 AND sin(sum(number)) -ORDER BY ALL +ORDER BY * SETTINGS enable_optimize_predicate_expression = 1; select '#45440'; diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql index fddbf41f0e5..95604c6f401 100644 --- a/tests/queries/0_stateless/02884_string_distance_function.sql +++ b/tests/queries/0_stateless/02884_string_distance_function.sql @@ -29,13 +29,13 @@ CREATE TABLE t INSERT INTO t VALUES ('', '') ('abc', '') ('', 'abc') ('abc', 'abc') ('abc', 'ab') ('abc', 'bc') ('clickhouse', 'mouse'); SELECT '-- non-const arguments'; -SELECT 'byteHammingDistance', s1, s2, byteHammingDistance(s1, s2) FROM t ORDER BY ALL; -SELECT 'editDistance', s1, s2, editDistance(s1, s2) FROM t ORDER BY ALL; -SELECT 'damerauLevenshteinDistance', s1, s2, damerauLevenshteinDistance(s1, s2) FROM t ORDER BY ALL; -SELECT 'stringJaccardIndex', s1, s2, stringJaccardIndex(s1, s2) FROM t ORDER BY ALL; -SELECT 'stringJaccardIndexUTF8', s1, s2, stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY ALL; -SELECT 'jaroSimilarity', s1, s2, jaroSimilarity(s1, s2) FROM t ORDER BY ALL; -SELECT 'jaroWinklerSimilarity', s1, s2, jaroWinklerSimilarity(s1, s2) FROM t ORDER BY ALL; +SELECT 'byteHammingDistance', s1, s2, byteHammingDistance(s1, s2) FROM t ORDER BY *; +SELECT 'editDistance', s1, s2, editDistance(s1, s2) FROM t ORDER BY *; +SELECT 'damerauLevenshteinDistance', s1, s2, damerauLevenshteinDistance(s1, s2) FROM t ORDER BY *; +SELECT 'stringJaccardIndex', s1, s2, stringJaccardIndex(s1, s2) FROM t ORDER BY *; +SELECT 'stringJaccardIndexUTF8', s1, s2, stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY *; +SELECT 'jaroSimilarity', s1, s2, jaroSimilarity(s1, s2) FROM t ORDER BY *; +SELECT 'jaroWinklerSimilarity', s1, s2, jaroWinklerSimilarity(s1, s2) FROM t ORDER BY *; SELECT '-- Special UTF-8 tests'; -- We do not perform full UTF8 validation, so sometimes it just returns some result diff --git a/tests/queries/0_stateless/02943_order_by_all.reference b/tests/queries/0_stateless/02943_order_by_all.reference index d91f6dfc4a5..ef399fe8e2d 100644 --- a/tests/queries/0_stateless/02943_order_by_all.reference +++ b/tests/queries/0_stateless/02943_order_by_all.reference @@ -49,39 +49,9 @@ A 2 2 A 3 B \N C --- "ALL" in ORDER BY is case-insensitive +-- Special case: all columns in SELECT clause, ORDER BY * A 2 B 3 C \N D 1 -A 2 -B 3 -C \N -D 1 -A 2 -B 3 -C \N -D 1 -A 2 -B 3 -C \N -D 1 --- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity --- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning -B 3 10 -D 1 20 -A 2 30 -C \N 40 -B 3 10 -D 1 20 -A 2 30 -C \N 40 --- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column) -A 2 30 -B 3 10 -C \N 40 -D 1 20 -A 2 30 -B 3 10 -C \N 40 -D 1 20 +-- "*" must appear stand-alone in ORDER BY diff --git a/tests/queries/0_stateless/02943_order_by_all.sql b/tests/queries/0_stateless/02943_order_by_all.sql index f10184e79b9..2fe628e9b95 100644 --- a/tests/queries/0_stateless/02943_order_by_all.sql +++ b/tests/queries/0_stateless/02943_order_by_all.sql @@ -1,4 +1,4 @@ --- Tests that sort expression ORDER BY ALL +-- Tests that sort expression ORDER BY * DROP TABLE IF EXISTS order_by_all; @@ -6,104 +6,48 @@ CREATE TABLE order_by_all ( a String, b Nullable(Int32), - all UInt64, ) ENGINE = Memory; -INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); +INSERT INTO order_by_all VALUES ('B', 3), ('C', NULL), ('D', 1), ('A', 2); SELECT '-- no modifiers'; SET allow_experimental_analyzer = 0; -SELECT a, b FROM order_by_all ORDER BY ALL; -SELECT b, a FROM order_by_all ORDER BY ALL; +SELECT a, b FROM order_by_all ORDER BY *; +SELECT b, a FROM order_by_all ORDER BY *; SET allow_experimental_analyzer = 1; -SELECT a, b FROM order_by_all ORDER BY ALL; -SELECT b, a FROM order_by_all ORDER BY ALL; +SELECT a, b FROM order_by_all ORDER BY *; +SELECT b, a FROM order_by_all ORDER BY *; SELECT '-- with ASC/DESC modifiers'; SET allow_experimental_analyzer = 0; -SELECT a, b FROM order_by_all ORDER BY ALL ASC; -SELECT a, b FROM order_by_all ORDER BY ALL DESC; +SELECT a, b FROM order_by_all ORDER BY * ASC; +SELECT a, b FROM order_by_all ORDER BY * DESC; SET allow_experimental_analyzer = 1; -SELECT a, b FROM order_by_all ORDER BY ALL ASC; -SELECT a, b FROM order_by_all ORDER BY ALL DESC; +SELECT a, b FROM order_by_all ORDER BY * ASC; +SELECT a, b FROM order_by_all ORDER BY * DESC; SELECT '-- with NULLS FIRST/LAST modifiers'; SET allow_experimental_analyzer = 0; -SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST; -SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST; +SELECT b, a FROM order_by_all ORDER BY * NULLS FIRST; +SELECT b, a FROM order_by_all ORDER BY * NULLS LAST; SET allow_experimental_analyzer = 1; -SELECT b, a FROM order_by_all ORDER BY ALL NULLS FIRST; -SELECT b, a FROM order_by_all ORDER BY ALL NULLS LAST; +SELECT b, a FROM order_by_all ORDER BY * NULLS FIRST; +SELECT b, a FROM order_by_all ORDER BY * NULLS LAST; -SELECT '-- "ALL" in ORDER BY is case-insensitive'; +SELECT '-- Special case: all columns in SELECT clause, ORDER BY *'; +SELECT * FROM order_by_all ORDER BY * NULLS LAST; + +SELECT '-- "*" must appear stand-alone in ORDER BY'; SET allow_experimental_analyzer = 0; -SELECT a, b FROM order_by_all ORDER BY ALL; -SELECT a, b FROM order_by_all ORDER BY all; +SELECT a, b FROM order_by_all ORDER BY *, a; -- { serverError UNKNOWN_IDENTIFIER } SET allow_experimental_analyzer = 1; -SELECT a, b FROM order_by_all ORDER BY ALL; -SELECT a, b FROM order_by_all ORDER BY all; - -SELECT '-- If "all" (case-insensitive) appears in the SELECT clause, throw an error because of ambiguity'; - --- columns - -SET allow_experimental_analyzer = 0; -SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } - -SET allow_experimental_analyzer = 1; -SELECT a, b, all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } - --- column aliases - -SET allow_experimental_analyzer = 0; -SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } - -SET allow_experimental_analyzer = 1; -SELECT a, b AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } - --- expressions - -SET allow_experimental_analyzer = 0; -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } - -SET allow_experimental_analyzer = 1; -SELECT format('{} {}', a, b) AS all FROM order_by_all ORDER BY ALL; -- { serverError UNEXPECTED_EXPRESSION } - -SELECT '-- If ORDER BY contains "ALL" plus other columns, then "ALL" loses its special meaning'; - -SET allow_experimental_analyzer = 0; -SELECT a, b, all FROM order_by_all ORDER BY all, a; - -SET allow_experimental_analyzer = 1; -SELECT a, b, all FROM order_by_all ORDER BY all, a; - -DROP TABLE order_by_all; - -SELECT '-- test SELECT * ORDER BY ALL (only works if the SELECT column contains no "all" column)'; - -CREATE TABLE order_by_all -( - a String, - b Nullable(Int32), - c UInt64, -) -ENGINE = Memory; - -INSERT INTO order_by_all VALUES ('B', 3, 10), ('C', NULL, 40), ('D', 1, 20), ('A', 2, 30); - -SET allow_experimental_analyzer = 0; -SELECT * FROM order_by_all ORDER BY ALL; - -SET allow_experimental_analyzer = 1; -SELECT * FROM order_by_all ORDER BY ALL; - -DROP TABLE order_by_all; +SELECT a, b FROM order_by_all ORDER BY *, a; -- { serverError UNSUPPORTED_METHOD } diff --git a/tests/queries/0_stateless/02962_join_using_bug_57894.sql b/tests/queries/0_stateless/02962_join_using_bug_57894.sql index 87aef8b1a71..c9570be7053 100644 --- a/tests/queries/0_stateless/02962_join_using_bug_57894.sql +++ b/tests/queries/0_stateless/02962_join_using_bug_57894.sql @@ -11,23 +11,23 @@ INSERT INTO r VALUES (NULL, NULL); SET allow_experimental_analyzer = 0; -SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SELECT x FROM t FULL JOIN r USING (x) ORDER BY * ; -SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SELECT x FROM t FULL JOIN r USING (x) ORDER BY * SETTINGS join_algorithm = 'partial_merge'; -SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SELECT x FROM t FULL JOIN r USING (x) ORDER BY * SETTINGS join_algorithm = 'full_sorting_merge'; SET allow_experimental_analyzer = 1; -SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SELECT x FROM t FULL JOIN r USING (x) ORDER BY * ; -SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SELECT x FROM t FULL JOIN r USING (x) ORDER BY * SETTINGS join_algorithm = 'partial_merge'; -SELECT x FROM t FULL JOIN r USING (x) ORDER BY ALL +SELECT x FROM t FULL JOIN r USING (x) ORDER BY * SETTINGS join_algorithm = 'full_sorting_merge'; From 365530c463de4b18c15e94b04bc28e84ed7e0002 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 15 Feb 2024 17:12:22 +0000 Subject: [PATCH 056/145] Fix 01656_test_query_log_factories_info with analyzer. --- src/Analyzer/Passes/ArrayExistsToHasPass.cpp | 4 ++- src/Analyzer/Passes/CNF.cpp | 29 ++++++++++--------- src/Analyzer/Passes/CNF.h | 2 +- .../Passes/ConvertOrLikeChainPass.cpp | 8 +++-- src/Analyzer/Passes/ConvertQueryToCNFPass.cpp | 4 +-- src/Analyzer/Passes/CrossToInnerJoinPass.cpp | 3 +- src/Analyzer/Passes/IfChainToMultiIfPass.cpp | 4 ++- src/Analyzer/Passes/MultiIfToIfPass.cpp | 4 ++- src/Functions/FunctionsLogical.cpp | 17 +++++++++++ src/Functions/array/has.cpp | 6 ++++ src/Functions/array/has.h | 12 ++++++++ src/Functions/functionsLogical.h | 15 ++++++++++ src/Functions/if.cpp | 5 ++++ src/Functions/if.h | 12 ++++++++ src/Functions/multiIf.cpp | 26 +++++++++++++---- src/Functions/multiIf.h | 12 ++++++++ src/Functions/multiMatchAny.cpp | 5 ++++ src/Functions/multiMatchAny.h | 12 ++++++++ .../01656_test_query_log_factories_info.sql | 4 ++- 19 files changed, 154 insertions(+), 30 deletions(-) create mode 100644 src/Functions/array/has.h create mode 100644 src/Functions/functionsLogical.h create mode 100644 src/Functions/if.h create mode 100644 src/Functions/multiIf.h create mode 100644 src/Functions/multiMatchAny.h diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp index 36c3df4d93a..62db502e1dc 100644 --- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp +++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp @@ -1,6 +1,7 @@ #include #include +#include #include @@ -83,7 +84,8 @@ public: return; } - auto has_function = FunctionFactory::instance().get("has", getContext()); + auto has_function = createInternalFunctionHasOverloadResolver(); + array_exists_function_arguments_nodes[0] = std::move(array_exists_function_arguments_nodes[1]); array_exists_function_arguments_nodes[1] = std::move(has_constant_element_argument); array_exists_function_node->resolveAsFunction(has_function->build(array_exists_function_node->getArgumentColumns())); diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp index aa6ee539934..1cde190606a 100644 --- a/src/Analyzer/Passes/CNF.cpp +++ b/src/Analyzer/Passes/CNF.cpp @@ -10,6 +10,7 @@ #include #include +#include #include @@ -79,7 +80,7 @@ public: if (name == "and" || name == "or") { - auto function_resolver = FunctionFactory::instance().get(name, current_context); + auto function_resolver = name == "and" ? createInternalFunctionAndOverloadResolver() : createInternalFunctionOrOverloadResolver(); const auto & arguments = function_node->getArguments().getNodes(); if (arguments.size() > 2) @@ -110,10 +111,10 @@ private: class PushNotVisitor { public: - explicit PushNotVisitor(const ContextPtr & context) - : not_function_resolver(FunctionFactory::instance().get("not", context)) - , or_function_resolver(FunctionFactory::instance().get("or", context)) - , and_function_resolver(FunctionFactory::instance().get("and", context)) + explicit PushNotVisitor() + : not_function_resolver(createInternalFunctionNotOverloadResolver()) + , or_function_resolver(createInternalFunctionOrOverloadResolver()) + , and_function_resolver(createInternalFunctionAndOverloadResolver()) {} void visit(QueryTreeNodePtr & node, bool add_negation) @@ -162,10 +163,10 @@ private: class PushOrVisitor { public: - PushOrVisitor(ContextPtr context, size_t max_atoms_) + PushOrVisitor(size_t max_atoms_) : max_atoms(max_atoms_) - , and_resolver(FunctionFactory::instance().get("and", context)) - , or_resolver(FunctionFactory::instance().get("or", context)) + , and_resolver(createInternalFunctionAndOverloadResolver()) + , or_resolver(createInternalFunctionOrOverloadResolver()) {} bool visit(QueryTreeNodePtr & node, size_t num_atoms) @@ -513,11 +514,11 @@ std::optional CNF::tryBuildCNF(const QueryTreeNodePtr & node, ContextPtr co } { - PushNotVisitor visitor(context); + PushNotVisitor visitor; visitor.visit(node_cloned, false); } - if (PushOrVisitor visitor(context, max_atoms); + if (PushOrVisitor visitor(max_atoms); !visitor.visit(node_cloned, atom_count)) return std::nullopt; @@ -542,7 +543,7 @@ CNF CNF::toCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_gro return *cnf; } -QueryTreeNodePtr CNF::toQueryTree(ContextPtr context) const +QueryTreeNodePtr CNF::toQueryTree() const { if (statements.empty()) return nullptr; @@ -550,9 +551,9 @@ QueryTreeNodePtr CNF::toQueryTree(ContextPtr context) const QueryTreeNodes and_arguments; and_arguments.reserve(statements.size()); - auto not_resolver = FunctionFactory::instance().get("not", context); - auto or_resolver = FunctionFactory::instance().get("or", context); - auto and_resolver = FunctionFactory::instance().get("and", context); + auto not_resolver = createInternalFunctionNotOverloadResolver(); + auto or_resolver = createInternalFunctionOrOverloadResolver(); + auto and_resolver = createInternalFunctionAndOverloadResolver(); const auto function_node_from_atom = [&](const auto & atom) -> QueryTreeNodePtr { diff --git a/src/Analyzer/Passes/CNF.h b/src/Analyzer/Passes/CNF.h index ec639cd6679..9325d97d2f2 100644 --- a/src/Analyzer/Passes/CNF.h +++ b/src/Analyzer/Passes/CNF.h @@ -54,7 +54,7 @@ public: static std::optional tryBuildCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_growth_multiplier = DEFAULT_MAX_GROWTH_MULTIPLIER); static CNF toCNF(const QueryTreeNodePtr & node, ContextPtr context, size_t max_growth_multiplier = DEFAULT_MAX_GROWTH_MULTIPLIER); - QueryTreeNodePtr toQueryTree(ContextPtr context) const; + QueryTreeNodePtr toQueryTree() const; const auto & getStatements() const { diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 905819bf49f..162948edc57 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -11,6 +11,8 @@ #include #include +#include +#include #include @@ -134,8 +136,10 @@ private: void ConvertOrLikeChainPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - auto or_function_resolver = FunctionFactory::instance().get("or", context); - auto match_function_resolver = FunctionFactory::instance().get("multiMatchAny", context); + const auto & settings = context->getSettingsRef(); + auto match_function_resolver = createInternalMultiMatchAnyOverloadResolver(settings.allow_hyperscan, settings.max_hyperscan_regexp_length, settings.max_hyperscan_regexp_total_length, settings.reject_expensive_hyperscan_regexps); + auto or_function_resolver = createInternalFunctionOrOverloadResolver(); + ConvertOrLikeChainVisitor visitor(std::move(or_function_resolver), std::move(match_function_resolver), std::move(context)); visitor.visit(query_tree_node); } diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 5ce1ea43f2f..96bc62212fd 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -339,7 +339,7 @@ void addIndexConstraint(Analyzer::CNF & cnf, const QueryTreeNodes & table_expres { Analyzer::CNF::OrGroup new_group; auto index_hint_node = std::make_shared("indexHint"); - index_hint_node->getArguments().getNodes().push_back(Analyzer::CNF{std::move(and_group)}.toQueryTree(context)); + index_hint_node->getArguments().getNodes().push_back(Analyzer::CNF{std::move(and_group)}.toQueryTree()); index_hint_node->resolveAsFunction(FunctionFactory::instance().get("indexHint", context)); new_group.insert({false, QueryTreeNodePtrWithHash{std::move(index_hint_node)}}); @@ -676,7 +676,7 @@ void optimizeNode(QueryTreeNodePtr & node, const QueryTreeNodes & table_expressi if (settings.optimize_using_constraints) optimizeWithConstraints(*cnf, table_expressions, context); - auto new_node = cnf->toQueryTree(context); + auto new_node = cnf->toQueryTree(); node = std::move(new_node); } diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index 154babf3d9a..9bbf3aad15d 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -12,6 +12,7 @@ #include #include +#include #include @@ -256,7 +257,7 @@ private: for (const auto & node : nodes) function_node->getArguments().getNodes().push_back(node); - const auto & function = FunctionFactory::instance().get("and", getContext()); + const auto & function = createInternalFunctionAndOverloadResolver(); function_node->resolveAsFunction(function->build(function_node->getArgumentColumns())); return function_node; } diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index 88e350ffa2e..70b717f3108 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -75,7 +76,8 @@ private: void IfChainToMultiIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - auto multi_if_function_ptr = FunctionFactory::instance().get("multiIf", context); + const auto & settings = context->getSettingsRef(); + auto multi_if_function_ptr = createInternalMultiIfOverloadResolver(settings.allow_execute_multiif_columnar, settings.allow_experimental_variant_type, settings.use_variant_as_common_type); IfChainToMultiIfPassVisitor visitor(std::move(multi_if_function_ptr), std::move(context)); visitor.visit(query_tree_node); } diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index 8e09d5cab38..c42ea61b34a 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -54,7 +55,8 @@ private: void MultiIfToIfPass::run(QueryTreeNodePtr & query_tree_node, ContextPtr context) { - auto if_function_ptr = FunctionFactory::instance().get("if", context); + const auto & settings = context->getSettingsRef(); + auto if_function_ptr = createInternalFunctionIfOverloadResolver(settings.allow_experimental_variant_type, settings.use_variant_as_common_type); MultiIfToIfVisitor visitor(std::move(if_function_ptr), std::move(context)); visitor.visit(query_tree_node); } diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index d01fdc99076..380f2260ed8 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -776,4 +776,21 @@ ColumnPtr FunctionUnaryLogical::executeImpl(const ColumnsWithTypeAnd return res; } +FunctionOverloadResolverPtr createInternalFunctionOrOverloadResolver() +{ + return std::make_unique(std::make_shared()); +} +FunctionOverloadResolverPtr createInternalFunctionAndOverloadResolver() +{ + return std::make_unique(std::make_shared()); +} +FunctionOverloadResolverPtr createInternalFunctionXorOverloadResolver() +{ + return std::make_unique(std::make_shared()); +} +FunctionOverloadResolverPtr createInternalFunctionNotOverloadResolver() +{ + return std::make_unique(std::make_shared()); +} + } diff --git a/src/Functions/array/has.cpp b/src/Functions/array/has.cpp index f08a4f29d2d..a17dcdcfbf9 100644 --- a/src/Functions/array/has.cpp +++ b/src/Functions/array/has.cpp @@ -9,4 +9,10 @@ struct NameHas { static constexpr auto name = "has"; }; using FunctionHas = FunctionArrayIndex; REGISTER_FUNCTION(Has) { factory.registerFunction(); } + +FunctionOverloadResolverPtr createInternalFunctionHasOverloadResolver() +{ + return std::make_unique(std::make_shared()); +} + } diff --git a/src/Functions/array/has.h b/src/Functions/array/has.h new file mode 100644 index 00000000000..226662d4051 --- /dev/null +++ b/src/Functions/array/has.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalFunctionHasOverloadResolver(); + +} diff --git a/src/Functions/functionsLogical.h b/src/Functions/functionsLogical.h new file mode 100644 index 00000000000..d2d07f6cec7 --- /dev/null +++ b/src/Functions/functionsLogical.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalFunctionOrOverloadResolver(); +FunctionOverloadResolverPtr createInternalFunctionAndOverloadResolver(); +FunctionOverloadResolverPtr createInternalFunctionXorOverloadResolver(); +FunctionOverloadResolverPtr createInternalFunctionNotOverloadResolver(); + +} diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 7306dc4173e..70aced8842a 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1413,4 +1413,9 @@ REGISTER_FUNCTION(If) factory.registerFunction({}, FunctionFactory::CaseInsensitive); } +FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type) +{ + return std::make_unique(std::make_shared(allow_experimental_variant_type && use_variant_as_common_type)); +} + } diff --git a/src/Functions/if.h b/src/Functions/if.h new file mode 100644 index 00000000000..09a7a6a3e78 --- /dev/null +++ b/src/Functions/if.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalFunctionIfOverloadResolver(bool allow_experimental_variant_type, bool use_variant_as_common_type); + +} diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index cb946b55c73..af7afb75e1a 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -40,9 +40,17 @@ class FunctionMultiIf final : public FunctionIfBase { public: static constexpr auto name = "multiIf"; - static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + static FunctionPtr create(ContextPtr context_) + { + const auto & settings = context_->getSettingsRef(); + return std::make_shared(settings.allow_execute_multiif_columnar, settings.allow_experimental_variant_type, settings.use_variant_as_common_type); + } - explicit FunctionMultiIf(ContextPtr context_) : context(context_) { } + explicit FunctionMultiIf(bool allow_execute_multiif_columnar_, bool allow_experimental_variant_type_, bool use_variant_as_common_type_) + : allow_execute_multiif_columnar(allow_execute_multiif_columnar_) + , allow_experimental_variant_type(allow_experimental_variant_type_) + , use_variant_as_common_type(use_variant_as_common_type_) + {} String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -118,7 +126,7 @@ public: types_of_branches.emplace_back(arg); }); - if (context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type) + if (allow_experimental_variant_type && use_variant_as_common_type) return getLeastSupertypeOrVariant(types_of_branches); return getLeastSupertype(types_of_branches); @@ -240,10 +248,9 @@ public: } } - const auto & settings = context->getSettingsRef(); const WhichDataType which(removeNullable(result_type)); bool execute_multiif_columnar - = settings.allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat()); + = allow_execute_multiif_columnar && !contains_short && (which.isInt() || which.isUInt() || which.isFloat()); size_t rows = input_rows_count; if (!execute_multiif_columnar) @@ -507,7 +514,9 @@ private: executeColumnIfNeeded(arguments[i], true); } - ContextPtr context; + const bool allow_execute_multiif_columnar; + const bool allow_experimental_variant_type; + const bool use_variant_as_common_type; }; } @@ -521,6 +530,11 @@ REGISTER_FUNCTION(MultiIf) factory.registerFunction("caseWithoutExpression"); } +FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type) +{ + return std::make_unique(std::make_shared(allow_execute_multiif_columnar, allow_experimental_variant_type, use_variant_as_common_type)); +} + } diff --git a/src/Functions/multiIf.h b/src/Functions/multiIf.h new file mode 100644 index 00000000000..617d63b89bc --- /dev/null +++ b/src/Functions/multiIf.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type); + +} diff --git a/src/Functions/multiMatchAny.cpp b/src/Functions/multiMatchAny.cpp index 6e6abe61898..054a60fce2d 100644 --- a/src/Functions/multiMatchAny.cpp +++ b/src/Functions/multiMatchAny.cpp @@ -22,4 +22,9 @@ REGISTER_FUNCTION(MultiMatchAny) factory.registerFunction(); } +FunctionOverloadResolverPtr createInternalMultiMatchAnyOverloadResolver(bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, bool reject_expensive_hyperscan_regexps) +{ + return std::make_unique(std::make_shared(allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length, reject_expensive_hyperscan_regexps)); +} + } diff --git a/src/Functions/multiMatchAny.h b/src/Functions/multiMatchAny.h new file mode 100644 index 00000000000..4548ec1d593 --- /dev/null +++ b/src/Functions/multiMatchAny.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace DB +{ + +class IFunctionOverloadResolver; +using FunctionOverloadResolverPtr = std::shared_ptr; + +FunctionOverloadResolverPtr createInternalMultiMatchAnyOverloadResolver(bool allow_hyperscan, size_t max_hyperscan_regexp_length, size_t max_hyperscan_regexp_total_length, bool reject_expensive_hyperscan_regexps); + +} diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql index 020d7cc5e72..8a6b604b053 100644 --- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql +++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql @@ -41,7 +41,9 @@ FROM system.query_log WHERE current_database = currentDatabase() AND type = 'Que ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames; SELECT ''; -SELECT arraySort(used_functions) +-- 1. analyzer includes arrayJoin into functions list +-- 2. for crc32 (CaseInsensitive function) we use lower case now +SELECT arraySort(arrayMap(x -> x == 'crc32' ? 'CRC32' : x, arrayFilter(x-> x != 'arrayJoin', used_functions))) as `arraySort(used_functions)` FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' AND (query LIKE '%toDate(\'2000-12-05\')%') ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames; SELECT ''; From 819effb6db1db310a0644292277ebf9bdcc7472e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 15 Feb 2024 17:15:06 +0000 Subject: [PATCH 057/145] Update analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 29331d674c8..cec528fc68e 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,4 +1,3 @@ -00223_shard_distributed_aggregation_memory_efficient 00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation From ef7e8e0c5db28c4d6e01927e6fa9593d1f63cfcc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 15 Feb 2024 17:35:44 +0000 Subject: [PATCH 058/145] Fixing style. --- src/Analyzer/Passes/CNF.cpp | 2 +- src/Analyzer/Passes/ConvertOrLikeChainPass.cpp | 2 +- src/Analyzer/Passes/CrossToInnerJoinPass.cpp | 2 +- src/Functions/FunctionsLogical.cpp | 1 + src/Functions/{functionsLogical.h => logical.h} | 0 5 files changed, 4 insertions(+), 3 deletions(-) rename src/Functions/{functionsLogical.h => logical.h} (100%) diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp index 1cde190606a..5cb79011856 100644 --- a/src/Analyzer/Passes/CNF.cpp +++ b/src/Analyzer/Passes/CNF.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 162948edc57..eb897ef8746 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index 9bbf3aad15d..d0a5656d334 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 380f2260ed8..d0795941e1f 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Functions/functionsLogical.h b/src/Functions/logical.h similarity index 100% rename from src/Functions/functionsLogical.h rename to src/Functions/logical.h From 2bda56d2003fa9085564a256ac6cae94419d2db9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 15 Feb 2024 17:56:15 +0000 Subject: [PATCH 059/145] Fixing analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index cec528fc68e..d6408ab712d 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,3 +1,4 @@ +00223_shard_distributed_aggregation_memory_efficien 00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation @@ -6,7 +7,6 @@ 01244_optimize_distributed_group_by_sharding_key 01584_distributed_buffer_cannot_find_column 01624_soft_constraints -01656_test_query_log_factories_info 01747_join_view_filter_dictionary 01761_cast_to_enum_nullable 01925_join_materialized_columns From 1dac0246a31edc0b2416dc75dabcfe79af6814c8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 16 Feb 2024 14:10:28 +0100 Subject: [PATCH 060/145] Remove extra empty line --- src/DataTypes/DataTypeTuple.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index 26a871182a7..eb218d8efb7 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -385,7 +385,6 @@ void DataTypeTuple::forEachChild(const ChildCallback & callback) const } } - static DataTypePtr create(const ASTPtr & arguments) { if (!arguments || arguments->children.empty()) From b85a68790aa37b06b8ce3cfe80b9e232315053a9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 16 Feb 2024 14:39:41 +0000 Subject: [PATCH 061/145] Cleanup: connection pool priority -> config priority - names were creating confusion between config priority and balancing priority for a reader --- programs/benchmark/Benchmark.cpp | 1 - src/Client/ConnectionPool.h | 65 +++++++++---------- src/Client/ConnectionPoolWithFailover.cpp | 8 --- src/Client/ConnectionPoolWithFailover.h | 2 - src/Common/PoolWithFailoverBase.h | 2 +- .../DistributedAsyncInsertDirectoryQueue.h | 2 - 6 files changed, 32 insertions(+), 48 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 961c678b936..fac88c0621f 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 8e707e8190f..574c4992d75 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -28,7 +28,10 @@ public: using Entry = PoolBase::Entry; IConnectionPool() = default; - IConnectionPool(String host_, UInt16 port_) : host(host_), port(port_), address(host + ":" + toString(port_)) {} + IConnectionPool(String host_, UInt16 port_, Priority config_priority_) + : host(host_), port(port_), address(host + ":" + toString(port_)), config_priority(config_priority_) + { + } virtual ~IConnectionPool() = default; @@ -42,12 +45,13 @@ public: const std::string & getHost() const { return host; } UInt16 getPort() const { return port; } const String & getAddress() const { return address; } - virtual Priority getPriority() const { return Priority{1}; } + Priority getConfigPriority() const { return config_priority; } protected: const String host; const UInt16 port = 0; const String address; + const Priority config_priority; }; using ConnectionPoolPtr = std::shared_ptr; @@ -61,32 +65,31 @@ public: using Entry = IConnectionPool::Entry; using Base = PoolBase; - ConnectionPool(unsigned max_connections_, - const String & host_, - UInt16 port_, - const String & default_database_, - const String & user_, - const String & password_, - const String & quota_key_, - const String & cluster_, - const String & cluster_secret_, - const String & client_name_, - Protocol::Compression compression_, - Protocol::Secure secure_, - Priority priority_ = Priority{1}) - : IConnectionPool(host_, port_), - Base(max_connections_, - getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")), - default_database(default_database_), - user(user_), - password(password_), - quota_key(quota_key_), - cluster(cluster_), - cluster_secret(cluster_secret_), - client_name(client_name_), - compression(compression_), - secure(secure_), - priority(priority_) + ConnectionPool( + unsigned max_connections_, + const String & host_, + UInt16 port_, + const String & default_database_, + const String & user_, + const String & password_, + const String & quota_key_, + const String & cluster_, + const String & cluster_secret_, + const String & client_name_, + Protocol::Compression compression_, + Protocol::Secure secure_, + Priority config_priority_ = Priority{1}) + : IConnectionPool(host_, port_, config_priority_) + , Base(max_connections_, getLogger("ConnectionPool (" + host_ + ":" + toString(port_) + ")")) + , default_database(default_database_) + , user(user_) + , password(password_) + , quota_key(quota_key_) + , cluster(cluster_) + , cluster_secret(cluster_secret_) + , client_name(client_name_) + , compression(compression_) + , secure(secure_) { } @@ -114,11 +117,6 @@ public: return host + ":" + toString(port); } - Priority getPriority() const override - { - return priority; - } - protected: /** Creates a new object to put in the pool. */ ConnectionPtr allocObject() override @@ -143,7 +141,6 @@ private: String client_name; Protocol::Compression compression; /// Whether to compress data when interacting with the server. Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. - Priority priority; /// priority from }; /** diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index fdc0a11e533..4c91f64eb40 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -79,14 +79,6 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts return Base::get(max_ignored_errors, fallback_to_stale_replicas, try_get_entry, get_priority); } -Priority ConnectionPoolWithFailover::getPriority() const -{ - return (*std::max_element(nested_pools.begin(), nested_pools.end(), [](const auto & a, const auto & b) - { - return a->getPriority() < b->getPriority(); - }))->getPriority(); -} - ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const { const auto [states, pools, error_decrease_time] = getPoolExtendedStates(); diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 7ccdd4787a4..49b988eb0b3 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -49,8 +49,6 @@ public: const Settings & settings, bool force_connected) override; /// From IConnectionPool - Priority getPriority() const override; /// From IConnectionPool - /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. */ diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 8fd83300eff..2f4223e0e61 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -66,7 +66,7 @@ public: , log(log_) { for (size_t i = 0;i < nested_pools.size(); ++i) - shared_pool_states[i].config_priority = nested_pools[i]->getPriority(); + shared_pool_states[i].config_priority = nested_pools[i]->getConfigPriority(); } struct TryResult diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h index f7d7553851a..a1b436bb9c8 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.h @@ -6,9 +6,7 @@ #include #include #include -#include #include -#include namespace CurrentMetrics { class Increment; } From b01ccbbc8800d497defd968568bd0b675f3d7495 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 16 Feb 2024 10:21:40 +0100 Subject: [PATCH 062/145] Fix --- src/Coordination/Changelog.cpp | 22 +++++++++++-------- src/Coordination/Changelog.h | 2 +- src/Coordination/tests/gtest_coordination.cpp | 4 +--- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 63bfb709125..d743801870a 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -719,10 +719,10 @@ void LogEntryStorage::prefetchCommitLogs() { for (const auto & prefetch_file_info : prefetch_info->file_infos) { - const auto & [changelog_description, position, count] = prefetch_file_info; - changelog_description->withLock( + prefetch_file_info.file_description->withLock( [&] { + const auto & [changelog_description, position, count] = prefetch_file_info; auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); file->seek(position, SEEK_SET); LOG_TRACE( @@ -1020,7 +1020,6 @@ void LogEntryStorage::updateTermInfoWithNewEntry(uint64_t index, uint64_t term) if (!log_term_infos.empty() && log_term_infos.back().term == term) return; - chassert(log_term_infos.empty() || log_term_infos.back().term == term - 1); log_term_infos.push_back(LogTermInfo{.term = term, .first_index = index}); } @@ -1224,10 +1223,10 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const } else if (auto it = logs_location.find(index); it != logs_location.end()) { - const auto & [changelog_description, position, size] = it->second; - changelog_description->withLock( + it->second.file_description->withLock( [&] { + const auto & [changelog_description, position, size] = it->second; auto file = changelog_description->disk->readFile(changelog_description->path, ReadSettings()); file->seek(position, SEEK_SET); LOG_TRACE( @@ -1282,6 +1281,10 @@ uint64_t LogEntryStorage::termAt(uint64_t index) const void LogEntryStorage::addLogLocations(std::vector> && indices_with_log_locations) { + /// if we have unlimited space in latest logs cache we don't need log location + if (latest_logs_cache.size_threshold == 0) + return; + std::lock_guard lock(logs_location_mutex); unapplied_indices_with_log_locations.insert( unapplied_indices_with_log_locations.end(), @@ -1291,7 +1294,8 @@ void LogEntryStorage::addLogLocations(std::vector new_unapplied_indices_with_log_locations; @@ -1343,11 +1347,11 @@ LogEntriesPtr LogEntryStorage::getLogEntriesBetween(uint64_t start, uint64_t end if (!read_info) return; - const auto & [file_description, start_position, count] = *read_info; - LOG_TRACE(log, "Reading from path {} {} entries", file_description->path, count); - file_description->withLock( + LOG_TRACE(log, "Reading from path {} {} entries", read_info->file_description->path, read_info->count); + read_info->file_description->withLock( [&] { + const auto & [file_description, start_position, count] = *read_info; auto file = file_description->disk->readFile(file_description->path); file->seek(start_position, SEEK_SET); diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index d18f6b84283..f8f05afa24f 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -260,7 +260,7 @@ private: struct PrefetchInfo { std::vector file_infos; - std::pair commit_prefetch_index_range; + std::pair commit_prefetch_index_range; std::atomic cancel; std::atomic done = false; }; diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 61ff8c3f16a..332e47c0e62 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1819,11 +1819,9 @@ void testLogAndStateMachine( snapshot_task.create_snapshot(std::move(snapshot_task.snapshot)); } + if (snapshot_created && changelog.size() > settings->reserved_log_items) - { - keeper_context->setLastCommitIndex(i - settings->reserved_log_items); changelog.compact(i - settings->reserved_log_items); - } } SnapshotsQueue snapshots_queue1{1}; From 0c00e58353183811f604ebf2e058e5069a3a38f2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 16 Feb 2024 17:22:22 +0000 Subject: [PATCH 063/145] Fixing build. --- src/CMakeLists.txt | 4 ++++ src/Functions/CMakeLists.txt | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 08913ed1b5a..dff70e06ce4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -506,6 +506,10 @@ if (TARGET ch_contrib::s2) dbms_target_link_libraries (PUBLIC ch_contrib::s2) endif() +if (TARGET ch_contrib::vectorscan) + dbms_target_link_libraries (PRIVATE ch_contrib::vectorscan) +endif() + if (TARGET ch_contrib::brotli) target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::brotli) endif() diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index a06e898b7c5..ac3e3671ae0 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -9,6 +9,11 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources FunctionHelpers.cpp extractTimeZoneFromFunctionArguments.cpp FunctionsLogical.cpp + if.cpp + multiIf.cpp + multiMatchAny.cpp + checkHyperscanRegexp.cpp + array/has.cpp CastOverloadResolver.cpp ) extract_into_parent_list(clickhouse_functions_headers dbms_headers From 3323d5ce81914d93026f2cf68b68170cf8c4053b Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 17 Feb 2024 15:02:26 +0100 Subject: [PATCH 064/145] Move threadPoolCallbackRunner to the "Common" folder. --- src/Backups/BackupIO_AzureBlobStorage.cpp | 2 +- src/Backups/BackupIO_S3.cpp | 2 +- src/{Interpreters => Common}/threadPoolCallbackRunner.h | 0 src/Disks/IO/ThreadPoolReader.h | 2 +- src/Disks/IO/ThreadPoolRemoteFSReader.h | 2 +- src/Disks/ObjectStorages/IObjectStorage.h | 2 +- src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h | 2 +- src/IO/ParallelReadBuffer.h | 2 +- src/IO/S3/copyS3File.h | 2 +- src/IO/WriteBufferFromS3.h | 2 +- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/MergeTree/MergeTreeMarksLoader.cpp | 2 +- src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp | 2 +- src/Storages/MergeTree/MergeTreeSource.cpp | 2 +- src/Storages/StorageS3.h | 2 +- src/Storages/StorageURL.cpp | 2 +- src/Storages/System/StorageSystemDetachedParts.cpp | 2 +- 19 files changed, 18 insertions(+), 18 deletions(-) rename src/{Interpreters => Common}/threadPoolCallbackRunner.h (100%) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 52ce20d5108..b3b92323109 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -2,7 +2,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include #include #include #include diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index fa4c1af3698..f8bbf5b1f79 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -2,7 +2,7 @@ #if USE_AWS_S3 #include -#include +#include #include #include #include diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Common/threadPoolCallbackRunner.h similarity index 100% rename from src/Interpreters/threadPoolCallbackRunner.h rename to src/Common/threadPoolCallbackRunner.h diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h index 42bc9bf8bb4..b8aff9f22a2 100644 --- a/src/Disks/IO/ThreadPoolReader.h +++ b/src/Disks/IO/ThreadPoolReader.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index cd2bf223f33..abc251b2b10 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 049935ad60c..56c269a3fc5 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include "config.h" diff --git a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h index a6abe03bac9..5f63e5f6e8a 100644 --- a/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h +++ b/src/Disks/ObjectStorages/ObjectStorageIteratorAsync.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 4cc49288af6..5771eb1ebe0 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h index 83814f42693..1433f8d18ba 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.h @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index e76b40f77b7..daac1190399 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include namespace DB diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 607be51ed25..093d26ba7bb 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -5,7 +5,7 @@ #if USE_AWS_S3 #include -#include +#include #include #include #include diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 230f39b074e..5dc269990a1 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4ad440dae00..1de79ed17ca 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 7531c03a011..aeb6afeff11 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 47c2fe07bb4..8d8b0f1cc79 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index a450505f7a8..e1d1d0951e4 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 73559ef8571..587145cd1a7 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 6f3599630d3..608e44c3cd0 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index a9cd5f2610a..3dae43976f7 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include From 8f29320a73c394357b20495433a1ac919f8be9c6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 18 Feb 2024 13:15:24 +0100 Subject: [PATCH 065/145] Flush StorageBuffer into multiple threads if num_layers > 1 --- src/Common/CurrentMetrics.cpp | 3 +++ src/Storages/StorageBuffer.cpp | 12 +++++++++++- src/Storages/StorageBuffer.h | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index c6fbafa8dc3..6931001202d 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -262,6 +262,9 @@ M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ M(RefreshableViews, "Number materialized views with periodic refreshing (REFRESH)") \ M(RefreshingViews, "Number of materialized views currently executing a refresh") \ + M(StorageBufferFlushThreads, "Number of threads for background flushes in StorageBuffer") \ + M(StorageBufferFlushThreadsActive, "Number of threads for background flushes in StorageBuffer running a task") \ + M(StorageBufferFlushThreadsScheduled, "Number of queued or active threads for background flushes in StorageBuffer") #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index d5c135bb81d..dbf6c7c7657 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -56,6 +56,9 @@ namespace CurrentMetrics { extern const Metric StorageBufferRows; extern const Metric StorageBufferBytes; + extern const Metric StorageBufferFlushThreads; + extern const Metric StorageBufferFlushThreadsActive; + extern const Metric StorageBufferFlushThreadsScheduled; } @@ -131,6 +134,7 @@ StorageBuffer::StorageBuffer( : IStorage(table_id_) , WithContext(context_->getBufferContext()) , num_shards(num_shards_) + , flush_pool(CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled, num_shards, 0, num_shards_) , buffers(num_shards_) , min_thresholds(min_thresholds_) , max_thresholds(max_thresholds_) @@ -802,7 +806,13 @@ bool StorageBuffer::checkThresholdsImpl(bool direct, size_t rows, size_t bytes, void StorageBuffer::flushAllBuffers(bool check_thresholds) { for (auto & buf : buffers) - flushBuffer(buf, check_thresholds, false); + { + flush_pool.scheduleOrThrowOnError([&] () + { + flushBuffer(buf, check_thresholds, false); + }); + } + flush_pool.wait(); } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 47f6239b173..50f12be5aef 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -149,6 +150,7 @@ private: /// There are `num_shards` of independent buffers. const size_t num_shards; + ThreadPool flush_pool; std::vector buffers; const Thresholds min_thresholds; From 29e3e7cb965b5ada347028282e87005c570d3400 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 18 Feb 2024 13:19:35 +0100 Subject: [PATCH 066/145] Better if only 1 layer --- src/Storages/StorageBuffer.cpp | 22 ++++++++++++++++++---- src/Storages/StorageBuffer.h | 2 +- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index dbf6c7c7657..c2b63101d11 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -134,7 +134,6 @@ StorageBuffer::StorageBuffer( : IStorage(table_id_) , WithContext(context_->getBufferContext()) , num_shards(num_shards_) - , flush_pool(CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled, num_shards, 0, num_shards_) , buffers(num_shards_) , min_thresholds(min_thresholds_) , max_thresholds(max_thresholds_) @@ -157,6 +156,12 @@ StorageBuffer::StorageBuffer( storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); + if (num_shards > 1) + { + flush_pool = std::make_unique( + CurrentMetrics::StorageBufferFlushThreads, CurrentMetrics::StorageBufferFlushThreadsActive, CurrentMetrics::StorageBufferFlushThreadsScheduled, + num_shards, 0, num_shards); + } flush_handle = bg_pool.createTask(log->name() + "/Bg", [this]{ backgroundFlush(); }); } @@ -807,12 +812,21 @@ void StorageBuffer::flushAllBuffers(bool check_thresholds) { for (auto & buf : buffers) { - flush_pool.scheduleOrThrowOnError([&] () + if (flush_pool) + { + flush_pool->scheduleOrThrowOnError([&] () + { + flushBuffer(buf, check_thresholds, false); + }); + } + else { flushBuffer(buf, check_thresholds, false); - }); + } } - flush_pool.wait(); + + if (flush_pool) + flush_pool->wait(); } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 50f12be5aef..6c15c7e0238 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -150,7 +150,7 @@ private: /// There are `num_shards` of independent buffers. const size_t num_shards; - ThreadPool flush_pool; + std::unique_ptr flush_pool; std::vector buffers; const Thresholds min_thresholds; From 6b2d89c78394f50c0053551b796cb5d2228c142f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 19 Feb 2024 10:17:51 +0800 Subject: [PATCH 067/145] change as request --- .../0_stateless/02985_if_over_big_int_decimal.reference | 6 ++++++ .../queries/0_stateless/02985_if_over_big_int_decimal.sql | 8 ++++++++ 2 files changed, 14 insertions(+) diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference index 055103ad134..1dfad945ee2 100644 --- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference +++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference @@ -4,3 +4,9 @@ 49500 49500 49500 +450000 +450000 +450000 +450000 +450000 +450000 diff --git a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql index 6868524d195..0295a64a092 100644 --- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql +++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql @@ -4,3 +4,11 @@ select sumIf(number::Int256, number % 10 == 0) from numbers(1000); select sumIf(number::UInt256, number % 10 == 0) from numbers(1000); select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000); select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000); + +-- Test when the condition is neither 0 nor 1 +select sumIf(number::Int128, number % 10) from numbers(1000); +select sumIf(number::UInt128, number % 10) from numbers(1000); +select sumIf(number::Int256, number % 10) from numbers(1000); +select sumIf(number::UInt256, number % 10) from numbers(1000); +select sumIf(number::Decimal128(3), number % 10) from numbers(1000); +select sumIf(number::Decimal256(3), number % 10) from numbers(1000); From 6329b6ae459519bc0471d3ce0179ed2bd09dacca Mon Sep 17 00:00:00 2001 From: Hongbin Ma Date: Mon, 19 Feb 2024 14:49:02 +0800 Subject: [PATCH 068/145] fix flaky test case --- tests/queries/0_stateless/00873_t64_codec_date.reference | 2 +- tests/queries/0_stateless/00873_t64_codec_date.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00873_t64_codec_date.reference b/tests/queries/0_stateless/00873_t64_codec_date.reference index 1568c3122e6..9353696610c 100644 --- a/tests/queries/0_stateless/00873_t64_codec_date.reference +++ b/tests/queries/0_stateless/00873_t64_codec_date.reference @@ -1,4 +1,4 @@ 1970-01-01 1970-01-01 1950-01-01 1950-01-01 1970-01-01 1970-01-01 1970-01-01 1970-01-01 -2149-06-06 2149-06-06 2149-06-08 2149-06-08 2149-06-06 2149-06-06 2149-06-06 2149-06-06 +2149-06-06 2149-06-06 2149-06-08 2149-06-08 diff --git a/tests/queries/0_stateless/00873_t64_codec_date.sql b/tests/queries/0_stateless/00873_t64_codec_date.sql index e9230c75665..c6e21baba12 100644 --- a/tests/queries/0_stateless/00873_t64_codec_date.sql +++ b/tests/queries/0_stateless/00873_t64_codec_date.sql @@ -13,7 +13,7 @@ INSERT INTO t64 values ('2149-06-06', '2149-06-06', '2149-06-06', '2149-06-06'); INSERT INTO t64 values ('2149-06-08', '2149-06-08', '2149-06-08', '2149-06-08'); INSERT INTO t64 values ('1950-01-01', '1950-01-01', '1950-01-01', '1950-01-01'); -SELECT * FROM t64 ORDER BY date16; +SELECT * FROM t64 ORDER BY date_32; SELECT * FROM t64 WHERE date16 != t_date16; SELECT * FROM t64 WHERE date_32 != t_date32; From b38ad8297c8c62871e75cc82afdd6ed666cff44d Mon Sep 17 00:00:00 2001 From: conicliu Date: Mon, 19 Feb 2024 16:11:42 +0800 Subject: [PATCH 069/145] skip log empty message --- src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 000d36752cb..e31d991ef09 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -701,7 +701,9 @@ void DataPartStorageOnDiskBase::remove( if (file_name.starts_with(proj_dir_name)) files_not_to_remove_for_projection.emplace(fs::path(file_name).filename()); - LOG_DEBUG(log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name); + if (!files_not_to_remove_for_projection.empty()) + LOG_DEBUG( + log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name); CanRemoveDescription proj_description { From c55204d8efdd8ab9fb19e35fb8d07d2c171e5870 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Feb 2024 09:29:19 +0100 Subject: [PATCH 070/145] Implement TODO --- src/Common/tests/gtest_async_loader.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index 950c7bbab76..fc2537abcfc 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -427,9 +427,7 @@ TEST(AsyncLoader, CancelExecutingTask) } } -// This test is disabled due to `MemorySanitizer: use-of-uninitialized-value` issue in `collectSymbolsFromProgramHeaders` function -// More details: https://github.com/ClickHouse/ClickHouse/pull/48923#issuecomment-1545415482 -TEST(AsyncLoader, DISABLED_JobFailure) +TEST(AsyncLoader, JobFailure) { AsyncLoaderTest t; t.loader.start(); From 6565423b1a3ca7a6127b848fc112e8c2eadb66ae Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 19 Feb 2024 10:32:36 +0100 Subject: [PATCH 071/145] Review fix --- src/Storages/StorageBuffer.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index c2b63101d11..5d717f84a1d 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -814,10 +815,10 @@ void StorageBuffer::flushAllBuffers(bool check_thresholds) { if (flush_pool) { - flush_pool->scheduleOrThrowOnError([&] () + scheduleFromThreadPool([&] () { flushBuffer(buf, check_thresholds, false); - }); + }, *flush_pool, "BufferFlush"); } else { From 0496d0f45fa6d07038a32de93f6f65ed02f0c971 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 18 Feb 2024 10:55:22 +0100 Subject: [PATCH 072/145] Do not retry queries if container is down in integration tests Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 1d96563251b..95722dd0db9 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3484,6 +3484,10 @@ class ClickHouseInstance: if check_callback(result): return result time.sleep(sleep_time) + except QueryRuntimeException as ex: + # Container is down, this is likely due to server crash. + if "No route to host" in str(ex): + raise except Exception as ex: # logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) From cfa198c4773cf05619766020bbe6aee859bb2739 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:34:00 +0100 Subject: [PATCH 073/145] remove comment Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Functions/FunctionBinaryArithmetic.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 9b4249b0aef..d253095ca01 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -2101,7 +2101,6 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A type_res = std::make_shared(left.getPrecision(), left.getScale()); else type_res = std::make_shared(right.getPrecision(), right.getScale()); - // Create result decimal type somehow, maybe similar to how we do it in getReturnTypeImplStatic auto res = executeNumericWithDecimal( left, right, From a5b62df0d6a2202d4a8ccdd766470db1c77e3cc3 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Feb 2024 13:46:51 +0100 Subject: [PATCH 074/145] Update 02982_dont_infer_exponent_floats.sql --- tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql index 17f62557fc2..2a281e898f1 100644 --- a/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql +++ b/tests/queries/0_stateless/02982_dont_infer_exponent_floats.sql @@ -1,3 +1,2 @@ DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 0; DESC format(CSV, '1E20\n1.1E20') settings input_format_try_infer_exponent_floats = 1; - From 7c2654f62e210d76bff31571bcf60cf28a7e5a14 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 19 Feb 2024 13:41:22 +0100 Subject: [PATCH 075/145] Fix data race --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 ++++-- src/Storages/MergeTree/IMergeTreeDataPart.h | 7 ++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 2826c3e23f1..11ede661f78 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -347,7 +347,7 @@ const IMergeTreeDataPart::Index & IMergeTreeDataPart::getIndex() const { std::scoped_lock lock(index_mutex); if (!index_loaded) - loadIndex(lock); + loadIndex(); index_loaded = true; return index; } @@ -569,6 +569,7 @@ void IMergeTreeDataPart::removeIfNeeded() UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const { + std::scoped_lock lock(index_mutex); UInt64 res = 0; for (const ColumnPtr & column : index) res += column->byteSize(); @@ -577,6 +578,7 @@ UInt64 IMergeTreeDataPart::getIndexSizeInBytes() const UInt64 IMergeTreeDataPart::getIndexSizeInAllocatedBytes() const { + std::scoped_lock lock(index_mutex); UInt64 res = 0; for (const ColumnPtr & column : index) res += column->allocatedBytes(); @@ -828,7 +830,7 @@ void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) co { } -void IMergeTreeDataPart::loadIndex(std::scoped_lock &) const +void IMergeTreeDataPart::loadIndex() const { /// Memory for index must not be accounted as memory usage for query, because it belongs to a table. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index c9dea1afcc5..0d7acfab891 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -565,8 +566,8 @@ protected: /// Lazily loaded in RAM. Contains each index_granularity-th value of primary key tuple. /// Note that marks (also correspond to primary key) are not always in RAM, but cached. See MarkCache.h. mutable std::mutex index_mutex; - mutable Index index; - mutable bool index_loaded = false; + mutable Index index TSA_GUARDED_BY(index_mutex); + mutable bool index_loaded TSA_GUARDED_BY(index_mutex) = false; /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk ColumnSize total_columns_size; @@ -664,7 +665,7 @@ private: virtual void appendFilesOfIndexGranularity(Strings & files) const; /// Loads the index file. - void loadIndex(std::scoped_lock &) const; + void loadIndex() const TSA_REQUIRES(index_mutex); void appendFilesOfIndex(Strings & files) const; From 17b1760eca0c9f61276d6a7e5cdba497bfdc48b2 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 19 Feb 2024 14:29:00 +0100 Subject: [PATCH 076/145] Add new setting to changes history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 64650bf0ef5..600fe150805 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,7 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"24.2", { + {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."}, {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"}, {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"}, From 0dc73bacc52ca64c7a3049dab68fa0bc7a9c9219 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 19 Feb 2024 11:18:04 +0100 Subject: [PATCH 077/145] Fix cleanup --- src/Coordination/Changelog.cpp | 70 ++++++++++++++++++--------- src/Coordination/Changelog.h | 2 + tests/config/config.d/keeper_port.xml | 4 +- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index d743801870a..16f6c36b8a3 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1,8 +1,7 @@ -#include #include #include -#include #include +#include #include #include #include @@ -1039,6 +1038,7 @@ void LogEntryStorage::addEntryWithLocation(uint64_t index, const LogEntryPtr & l if (logs_location.size() == 1) min_index_with_location = index; + max_index_with_location = index; if (log_entry->get_val_type() == nuraft::conf) @@ -1073,22 +1073,37 @@ void LogEntryStorage::cleanUpTo(uint64_t index) } min_index_with_location = index; + } } + { + std::lock_guard lock(logs_location_mutex); + if (!unapplied_indices_with_log_locations.empty()) + { + auto last = std::ranges::lower_bound( + unapplied_indices_with_log_locations, + index, + std::ranges::less{}, + [](const auto & index_with_location) { return index_with_location.first; }); + + unapplied_indices_with_log_locations.erase(unapplied_indices_with_log_locations.begin(), last); + } + } /// uncommitted logs should be compacted only if we received snapshot from leader if (current_prefetch_info && !current_prefetch_info->done) { auto [prefetch_from, prefetch_to] = current_prefetch_info->commit_prefetch_index_range; /// if we will clean some logs that are currently prefetched, stop prefetching - /// and clean all logs that were being prefetched + /// and clean all logs from it if (index > prefetch_from) { current_prefetch_info->cancel = true; current_prefetch_info->done.wait(false); - commit_logs_cache.cleanUpTo(std::max(prefetch_to + 1, index)); + commit_logs_cache.clear(); } + /// start prefetching logs for committing at the current index /// the last log index in the snapshot should be the /// last log we cleaned up @@ -1148,6 +1163,20 @@ void LogEntryStorage::cleanAfter(uint64_t index) } } + { + std::lock_guard lock(logs_location_mutex); + if (!unapplied_indices_with_log_locations.empty()) + { + auto first = std::ranges::upper_bound( + unapplied_indices_with_log_locations, + index, + std::ranges::less{}, + [](const auto & index_with_location) { return index_with_location.first; }); + + unapplied_indices_with_log_locations.erase(first, unapplied_indices_with_log_locations.end()); + } + } + /// if we cleared all latest logs, there is a possibility we would need to clear commit logs if (latest_logs_cache.empty()) { @@ -1204,24 +1233,24 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const LogEntryPtr entry = nullptr; if (latest_config != nullptr && index == latest_config_index) + return latest_config; + + if (first_log_entry != nullptr && index == first_log_index) + return first_log_entry; + + if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index)) { - entry = latest_config; - } - else if (first_log_entry != nullptr && index == first_log_index) - { - entry = first_log_entry; - } - else if (auto entry_from_latest_cache = latest_logs_cache.getEntry(index)) - { - entry = std::move(entry_from_latest_cache); ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromLatestCache); + return entry_from_latest_cache; } - else if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index)) + + if (auto entry_from_commit_cache = commit_logs_cache.getEntry(index)) { - entry = std::move(entry_from_commit_cache); ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromCommitCache); + return entry_from_commit_cache; } - else if (auto it = logs_location.find(index); it != logs_location.end()) + + if (auto it = logs_location.find(index); it != logs_location.end()) { it->second.file_description->withLock( [&] @@ -1250,6 +1279,7 @@ LogEntryPtr LogEntryStorage::getEntry(uint64_t index) const ProfileEvents::increment(ProfileEvents::KeeperLogsEntryReadFromFile); } + return entry; } @@ -2378,14 +2408,10 @@ void Changelog::getKeeperLogInfo(KeeperLogInfo & log_info) const if (!entry_storage.empty()) { log_info.first_log_idx = getStartIndex(); - auto first_entry = entryAt(log_info.first_log_idx); - chassert(first_entry != nullptr); - log_info.first_log_term = first_entry->get_term(); + log_info.first_log_term = termAt(log_info.first_log_idx); log_info.last_log_idx = max_log_id; - auto last_entry = entryAt(log_info.last_log_idx); - chassert(last_entry != nullptr); - log_info.last_log_term = last_entry->get_term(); + log_info.last_log_term = termAt(log_info.last_log_idx); } entry_storage.getKeeperLogInfo(log_info); diff --git a/src/Coordination/Changelog.h b/src/Coordination/Changelog.h index f8f05afa24f..2e8dbe75e90 100644 --- a/src/Coordination/Changelog.h +++ b/src/Coordination/Changelog.h @@ -169,7 +169,9 @@ struct LogEntryStorage void addEntry(uint64_t index, const LogEntryPtr & log_entry); void addEntryWithLocation(uint64_t index, const LogEntryPtr & log_entry, LogLocation log_location); + /// clean all logs up to (but not including) index void cleanUpTo(uint64_t index); + /// clean all logs after (but not including) index void cleanAfter(uint64_t index); bool contains(uint64_t index) const; LogEntryPtr getEntry(uint64_t index) const; diff --git a/tests/config/config.d/keeper_port.xml b/tests/config/config.d/keeper_port.xml index 0487ceed989..2b04d843a3b 100644 --- a/tests/config/config.d/keeper_port.xml +++ b/tests/config/config.d/keeper_port.xml @@ -25,8 +25,8 @@ 1 - 31557632 - 20623360 + 1073741824 + 524288000 From 9e4e44fc711bcc2bbbef42153f562ca5e4e1def9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 19 Feb 2024 14:18:52 +0000 Subject: [PATCH 078/145] Tests: query log records for insert over http --- .../test_insert_exception_over_http/test.py | 79 +++++++++++++++++-- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_exception_over_http/test.py index a03d68e0b03..e63f79928bf 100644 --- a/tests/integration/test_insert_exception_over_http/test.py +++ b/tests/integration/test_insert_exception_over_http/test.py @@ -15,17 +15,21 @@ def start_cluster(): cluster.shutdown() -def test_insert_exception_over_http(start_cluster): +@pytest.mark.parametrize("inject_failpoint", [1, 0]) +def test_insert_over_http_exception(start_cluster, inject_failpoint): + instance.query("DROP TABLE IF EXISTS tt SYNC") instance.query( "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)" ) - instance.query( - "SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception" - ) + if inject_failpoint > 0: + instance.query( + "SYSTEM ENABLE FAILPOINT execute_query_calling_empty_set_result_func_on_exception" + ) + log_comment = f"{inject_failpoint}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87" assert True == instance.http_query_and_get_error( - "insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)", + f"insert into tt settings insert_keeper_max_retries=0, insert_keeper_fault_injection_probability=1.0, log_comment='{log_comment}' values (1), (2), (3), (4), (5)", method="POST", ).startswith("500 Internal Server Error") @@ -33,12 +37,75 @@ def test_insert_exception_over_http(start_cluster): instance.query("SYSTEM FLUSH LOGS") + assert "1\n" == instance.query( + f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'" + ) + assert "1\n" == instance.query( + f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'" + ) + assert "0\n" == instance.query( + f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type != 'QueryStart' and type != 'ExceptionWhileProcessing'" + ) + + instance.query("DROP TABLE tt SYNC") + + +def test_insert_over_http_invalid_statement(start_cluster): + + http_status = 400 + log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87" + assert True == instance.http_query_and_get_error( + f"insert into settings log_comment='{log_comment}' values (1), (2), (3), (4), (5)", + method="POST", + ).startswith(f"{http_status}") + + instance.query("SYSTEM FLUSH LOGS") + + assert f"0\n" == instance.query( + f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday()" + ) + +def test_insert_over_http_unknown_table(start_cluster): + + http_status = 404 + log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87" + assert True == instance.http_query_and_get_error( + f"insert into unknown_table settings log_comment='{log_comment}' values (1), (2), (3), (4), (5)", + method="POST", + ).startswith(f"{http_status}") + + instance.query("SYSTEM FLUSH LOGS") + + assert f"1\n" == instance.query( + f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionBeforeStart'" + ) + assert f"0\n" == instance.query( + f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday() and type != 'ExceptionBeforeStart'" + ) + + +def test_insert_over_http_ok(start_cluster): + instance.query("DROP TABLE IF EXISTS tt SYNC") + instance.query( + "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)" + ) + + _, error = instance.http_query_and_get_answer_with_error( + "insert into tt settings log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)", + method="POST", + ) + assert(error == None) + + assert "5\n" == instance.query("select count() from tt") + + instance.query("SYSTEM FLUSH LOGS") + assert "1\n" == instance.query( "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryStart'" ) assert "1\n" == instance.query( - "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'ExceptionWhileProcessing'" + "select count() from system.query_log where log_comment ='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' and current_database = currentDatabase() and event_date >= yesterday() and type = 'QueryFinish'" ) instance.query("DROP TABLE tt SYNC") From e77afa9dc521a367130156908b2ed5e1686c3709 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 19 Feb 2024 14:20:39 +0000 Subject: [PATCH 079/145] Rename test --- .../__init__.py | 0 .../test.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/integration/{test_insert_exception_over_http => test_insert_over_http_query_log}/__init__.py (100%) rename tests/integration/{test_insert_exception_over_http => test_insert_over_http_query_log}/test.py (100%) diff --git a/tests/integration/test_insert_exception_over_http/__init__.py b/tests/integration/test_insert_over_http_query_log/__init__.py similarity index 100% rename from tests/integration/test_insert_exception_over_http/__init__.py rename to tests/integration/test_insert_over_http_query_log/__init__.py diff --git a/tests/integration/test_insert_exception_over_http/test.py b/tests/integration/test_insert_over_http_query_log/test.py similarity index 100% rename from tests/integration/test_insert_exception_over_http/test.py rename to tests/integration/test_insert_over_http_query_log/test.py From cc223a1c3e8579a3192da8881a501d6531ccf70e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Feb 2024 14:33:53 +0000 Subject: [PATCH 080/145] Automatic style fix --- tests/integration/test_insert_over_http_query_log/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_insert_over_http_query_log/test.py b/tests/integration/test_insert_over_http_query_log/test.py index e63f79928bf..6e862e5ddde 100644 --- a/tests/integration/test_insert_over_http_query_log/test.py +++ b/tests/integration/test_insert_over_http_query_log/test.py @@ -17,7 +17,6 @@ def start_cluster(): @pytest.mark.parametrize("inject_failpoint", [1, 0]) def test_insert_over_http_exception(start_cluster, inject_failpoint): - instance.query("DROP TABLE IF EXISTS tt SYNC") instance.query( "CREATE TABLE tt (KeyID UInt32) Engine = ReplicatedMergeTree('/test_insert_exception_over_http/tt', 'r1') ORDER BY (KeyID)" @@ -51,7 +50,6 @@ def test_insert_over_http_exception(start_cluster, inject_failpoint): def test_insert_over_http_invalid_statement(start_cluster): - http_status = 400 log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87" assert True == instance.http_query_and_get_error( @@ -65,8 +63,8 @@ def test_insert_over_http_invalid_statement(start_cluster): f"select count() from system.query_log where log_comment ='{log_comment}' and current_database = currentDatabase() and event_date >= yesterday()" ) -def test_insert_over_http_unknown_table(start_cluster): +def test_insert_over_http_unknown_table(start_cluster): http_status = 404 log_comment = f"{http_status}_02988_66a57d6f-d1cc-4693-8bf4-206848edab87" assert True == instance.http_query_and_get_error( @@ -94,7 +92,7 @@ def test_insert_over_http_ok(start_cluster): "insert into tt settings log_comment='02988_66a57d6f-d1cc-4693-8bf4-206848edab87' values (1), (2), (3), (4), (5)", method="POST", ) - assert(error == None) + assert error == None assert "5\n" == instance.query("select count() from tt") From 8f5a4d92a172d704f02ff01d11c7e4142f41de3f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 19 Feb 2024 15:19:04 +0000 Subject: [PATCH 081/145] Fix style --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 1 - src/Interpreters/TreeRewriter.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 8b42135ecec..852c1d4d8c7 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -122,7 +122,6 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; extern const int SYNTAX_ERROR; - extern const int UNEXPECTED_EXPRESSION; extern const int INVALID_IDENTIFIER; } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 14fbc9ebebb..e442e47faf4 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -73,7 +73,6 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int UNKNOWN_IDENTIFIER; - extern const int UNEXPECTED_EXPRESSION; } namespace From 175a1db7876d4a7ff115a56c7018a386812b5a87 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 19 Feb 2024 16:11:29 +0100 Subject: [PATCH 082/145] Support specifying users for s3 settings --- src/Backups/BackupIO_S3.cpp | 4 +- src/IO/S3Common.cpp | 21 +++++++- src/IO/S3Common.h | 5 ++ src/Storages/StorageS3.cpp | 2 +- src/Storages/StorageS3Settings.cpp | 5 +- src/Storages/StorageS3Settings.h | 2 +- .../configs/s3_settings.xml | 7 +++ .../test_backup_restore_s3/test.py | 54 +++++++++++++++++++ 8 files changed, 93 insertions(+), 7 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index fa4c1af3698..174b5cfc27c 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -127,7 +127,7 @@ BackupReaderS3::BackupReaderS3( : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); @@ -217,7 +217,7 @@ BackupWriterS3::BackupWriterS3( : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 5039059f522..56e3e0df21b 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -1,7 +1,9 @@ #include #include +#include #include + #include "config.h" #if USE_AWS_S3 @@ -124,6 +126,15 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const HTTPHeaderEntries headers = getHTTPHeaders(config_elem, config); ServerSideEncryptionKMSConfig sse_kms_config = getSSEKMSConfig(config_elem, config); + std::unordered_set users; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_elem, keys); + for (const auto & key : keys) + { + if (startsWith(key, "user")) + users.insert(config.getString(config_elem + "." + key)); + } + return AuthSettings { std::move(access_key_id), std::move(secret_access_key), std::move(session_token), @@ -134,10 +145,16 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const use_environment_credentials, use_insecure_imds_request, expiration_window_seconds, - no_sign_request + no_sign_request, + std::move(users) }; } +bool AuthSettings::canBeUsedByUser(const String & user) const +{ + return users.empty() || users.contains(user); +} + bool AuthSettings::hasUpdates(const AuthSettings & other) const { AuthSettings copy = *this; @@ -173,6 +190,8 @@ void AuthSettings::updateFrom(const AuthSettings & from) if (from.no_sign_request.has_value()) no_sign_request = from.no_sign_request; + + users.insert(from.users.begin(), from.users.end()); } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 6ee8d96ed09..b3e01bd6132 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -6,6 +6,7 @@ #include #include +#include #include "config.h" @@ -92,9 +93,13 @@ struct AuthSettings std::optional expiration_window_seconds; std::optional no_sign_request; + std::unordered_set users; + bool hasUpdates(const AuthSettings & other) const; void updateFrom(const AuthSettings & from); + bool canBeUsedByUser(const String & user) const; + private: bool operator==(const AuthSettings & other) const = default; }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index e29fdd0d4a0..2d8ef3df1c8 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1385,7 +1385,7 @@ const StorageS3::Configuration & StorageS3::getConfiguration() bool StorageS3::Configuration::update(const ContextPtr & context) { - auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString()); + auto s3_settings = context->getStorageS3Settings().getSettings(url.uri.toString(), context->getUserName()); request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context->getSettings()); diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index b0c1160429a..2a0d15a2bab 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -293,7 +293,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint) const +S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); @@ -302,7 +302,8 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const for (auto possible_prefix_setting = next_prefix_setting; possible_prefix_setting != s3_settings.begin();) { std::advance(possible_prefix_setting, -1); - if (boost::algorithm::starts_with(endpoint, possible_prefix_setting->first)) + const auto & [endpoint_prefix, settings] = *possible_prefix_setting; + if (boost::algorithm::starts_with(endpoint, endpoint_prefix) && settings.auth_settings.canBeUsedByUser(user)) return possible_prefix_setting->second; } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 0e152bb2d31..21b6264717e 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint) const; + S3Settings getSettings(const String & endpoint, const String & user) const; private: mutable std::mutex mutex; diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml index 981cf67bbe9..adeb61cbe07 100644 --- a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml +++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml @@ -10,6 +10,13 @@ 3 2 + + http://minio1:9001/root/data/backups/limited/ + minio + minio123 + superuser1 + superuser2 + 1 1 diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index cd8f70b3239..783cf1feade 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -452,3 +452,57 @@ def test_backup_to_zip(): backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/{backup_name}.zip', 'minio', 'minio123')" check_backup_and_restore(storage_policy, backup_destination) + + +def test_user_specific_auth(start_cluster): + def create_user(user): + node.query(f"CREATE USER {user}") + node.query(f"GRANT CURRENT GRANTS ON *.* TO {user}") + + create_user("superuser1") + create_user("superuser2") + create_user("regularuser") + + node.query("CREATE TABLE specific_auth (col UInt64) ENGINE=Memory") + + assert "Access Denied" in node.query_and_get_error( + "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')" + ) + assert "Access Denied" in node.query_and_get_error( + "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + user="regularuser", + ) + + node.query( + "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + user="superuser1", + ) + node.query( + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + user="superuser1", + ) + + node.query( + "BACKUP TABLE specific_auth TO S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", + user="superuser2", + ) + node.query( + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup2.zip')", + user="superuser2", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth FROM S3('http://minio1:9001/root/data/backups/limited/backup1.zip')", + user="regularuser", + ) + + assert "HTTP response code: 403" in node.query_and_get_error( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + user="regularuser", + ) + node.query( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", + user="superuser1", + ) + + node.query("DROP TABLE IF EXISTS test.specific_auth") From 793ae52bf886372579e2ae6af9a73eb0bbe25f99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Feb 2024 17:30:00 +0100 Subject: [PATCH 083/145] Fix cosineDistance crash with Nullable --- .../en/sql-reference/functions/distance-functions.md | 2 +- src/Functions/vectorFunctions.cpp | 12 ++++++------ .../02994_cosineDistanceNullable.reference | 11 +++++++++++ .../0_stateless/02994_cosineDistanceNullable.sql | 3 +++ 4 files changed, 21 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02994_cosineDistanceNullable.reference create mode 100644 tests/queries/0_stateless/02994_cosineDistanceNullable.sql diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 1774c22014d..e20c35c6b6f 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -509,7 +509,7 @@ Result: ## cosineDistance -Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The less the returned value is, the more similar are the vectors. +Calculates the cosine distance between two vectors (the values of the tuples are the coordinates). The smaller the returned value is, the more similar are the vectors. **Syntax** diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 33b0e9f6039..de4a6fb0a5c 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -1,9 +1,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -1364,11 +1364,11 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - if (getReturnTypeImpl(arguments)->isNullable()) - { - return DataTypeNullable(std::make_shared()) - .createColumnConstWithDefaultValue(input_rows_count); - } + /// TODO: cosineDistance does not support nullable arguments + /// https://github.com/ClickHouse/ClickHouse/pull/27933#issuecomment-916670286 + auto return_type = getReturnTypeImpl(arguments); + if (return_type->isNullable()) + return return_type->createColumnConstWithDefaultValue(input_rows_count); FunctionDotProduct dot(context); ColumnWithTypeAndName dot_result{dot.executeImpl(arguments, DataTypePtr(), input_rows_count), diff --git a/tests/queries/0_stateless/02994_cosineDistanceNullable.reference b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference new file mode 100644 index 00000000000..e4fe1f97e7e --- /dev/null +++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.reference @@ -0,0 +1,11 @@ +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/02994_cosineDistanceNullable.sql b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql new file mode 100644 index 00000000000..a62216982f3 --- /dev/null +++ b/tests/queries/0_stateless/02994_cosineDistanceNullable.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/59596 +SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)); +SELECT cosineDistance((1, 1), (toNullable(0.5), 0.1)) from numbers(10); From 9361946d151a082ca190b9d7489804b9c30ef3b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Feb 2024 17:48:53 +0100 Subject: [PATCH 084/145] Fix build in master --- src/Storages/StorageBuffer.cpp | 58 +++++++++++++++++----------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 5d717f84a1d..2925038ec8e 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1,41 +1,41 @@ -#include -#include #include +#include +#include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include #include #include #include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace ProfileEvents From f2091ac6cf90bb87b1d6370bc6cd0b4d4c0daa29 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 19 Feb 2024 19:33:14 +0000 Subject: [PATCH 085/145] Mini cleanup of CPUID.h --- src/Common/CPUID.h | 490 ++++++++++++++++++++++----------------------- 1 file changed, 243 insertions(+), 247 deletions(-) diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h index b47e7e808d7..d7a714ec5af 100644 --- a/src/Common/CPUID.h +++ b/src/Common/CPUID.h @@ -57,6 +57,249 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT #endif } +union CPUInfo +{ + UInt32 info[4]; + + struct Registers + { + UInt32 eax; + UInt32 ebx; + UInt32 ecx; + UInt32 edx; + } registers; + + inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } + + inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } +}; + +inline bool haveRDTSCP() noexcept +{ + return (CPUInfo(0x80000001).registers.edx >> 27) & 1u; +} + +inline bool haveSSE() noexcept +{ + return (CPUInfo(0x1).registers.edx >> 25) & 1u; +} + +inline bool haveSSE2() noexcept +{ + return (CPUInfo(0x1).registers.edx >> 26) & 1u; +} + +inline bool haveSSE3() noexcept +{ + return CPUInfo(0x1).registers.ecx & 1u; +} + +inline bool havePCLMUL() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 1) & 1u; +} + +inline bool haveSSSE3() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 9) & 1u; +} + +inline bool haveSSE41() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 19) & 1u; +} + +inline bool haveSSE42() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 20) & 1u; +} + +inline bool haveF16C() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 29) & 1u; +} + +inline bool havePOPCNT() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 23) & 1u; +} + +inline bool haveAES() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 25) & 1u; +} + +inline bool haveXSAVE() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 26) & 1u; +} + +inline bool haveOSXSAVE() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 27) & 1u; +} + +inline bool haveAVX() noexcept +{ +#if defined(__x86_64__) + // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf + // https://bugs.chromium.org/p/chromium/issues/detail?id=375968 + return haveOSXSAVE() // implies haveXSAVE() + && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS + && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit +#else + return false; +#endif +} + +inline bool haveFMA() noexcept +{ + return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u); +} + +inline bool haveAVX2() noexcept +{ + return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u); +} + +inline bool haveBMI1() noexcept +{ + return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u; +} + +inline bool haveBMI2() noexcept +{ + return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u; +} + +inline bool haveAVX512F() noexcept +{ +#if defined(__x86_64__) + // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support + return haveOSXSAVE() // implies haveXSAVE() + && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS + && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS + && CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present + && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit +#else + return false; +#endif +} + +inline bool haveAVX512DQ() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u); +} + +inline bool haveRDSEED() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u); +} + +inline bool haveADX() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u); +} + +inline bool haveAVX512IFMA() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u); +} + +inline bool havePCOMMIT() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u); +} + +inline bool haveCLFLUSHOPT() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u); +} + +inline bool haveCLWB() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u); +} + +inline bool haveAVX512PF() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u); +} + +inline bool haveAVX512ER() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u); +} + +inline bool haveAVX512CD() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u); +} + +inline bool haveSHA() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u); +} + +inline bool haveAVX512BW() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u); +} + +inline bool haveAVX512VL() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u); +} + +inline bool havePREFETCHWT1() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u); +} + +inline bool haveAVX512VBMI() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u); +} + +inline bool haveAVX512VBMI2() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u); +} + +inline bool haveRDRAND() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u); +} + +inline bool haveAMX() noexcept +{ +#if defined(__x86_64__) + // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf + return haveOSXSAVE() // implies haveXSAVE() + && ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS +#else + return false; +#endif +} + +inline bool haveAMXBF16() noexcept +{ + return haveAMX() + && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit +} + +inline bool haveAMXTILE() noexcept +{ + return haveAMX() + && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit +} + +inline bool haveAMXINT8() noexcept +{ + return haveAMX() + && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit +} + #define CPU_ID_ENUMERATE(OP) \ OP(SSE) \ OP(SSE2) \ @@ -98,253 +341,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT OP(AMXTILE) \ OP(AMXINT8) -union CPUInfo -{ - UInt32 info[4]; - - struct Registers - { - UInt32 eax; - UInt32 ebx; - UInt32 ecx; - UInt32 edx; - } registers; - - inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } - - inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } -}; - -#define DEF_NAME(X) inline bool have##X() noexcept; - CPU_ID_ENUMERATE(DEF_NAME) -#undef DEF_NAME - -bool haveRDTSCP() noexcept -{ - return (CPUInfo(0x80000001).registers.edx >> 27) & 1u; -} - -bool haveSSE() noexcept -{ - return (CPUInfo(0x1).registers.edx >> 25) & 1u; -} - -bool haveSSE2() noexcept -{ - return (CPUInfo(0x1).registers.edx >> 26) & 1u; -} - -bool haveSSE3() noexcept -{ - return CPUInfo(0x1).registers.ecx & 1u; -} - -bool havePCLMUL() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 1) & 1u; -} - -bool haveSSSE3() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 9) & 1u; -} - -bool haveSSE41() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 19) & 1u; -} - -bool haveSSE42() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 20) & 1u; -} - -bool haveF16C() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 29) & 1u; -} - -bool havePOPCNT() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 23) & 1u; -} - -bool haveAES() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 25) & 1u; -} - -bool haveXSAVE() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 26) & 1u; -} - -bool haveOSXSAVE() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 27) & 1u; -} - -bool haveAVX() noexcept -{ -#if defined(__x86_64__) - // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf - // https://bugs.chromium.org/p/chromium/issues/detail?id=375968 - return haveOSXSAVE() // implies haveXSAVE() - && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS - && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit -#else - return false; -#endif -} - -bool haveFMA() noexcept -{ - return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u); -} - -bool haveAVX2() noexcept -{ - return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u); -} - -bool haveBMI1() noexcept -{ - return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u; -} - -bool haveBMI2() noexcept -{ - return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u; -} - -bool haveAVX512F() noexcept -{ -#if defined(__x86_64__) - // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support - return haveOSXSAVE() // implies haveXSAVE() - && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS - && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS - && CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present - && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit -#else - return false; -#endif -} - -bool haveAVX512DQ() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u); -} - -bool haveRDSEED() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u); -} - -bool haveADX() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u); -} - -bool haveAVX512IFMA() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u); -} - -bool havePCOMMIT() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u); -} - -bool haveCLFLUSHOPT() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u); -} - -bool haveCLWB() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u); -} - -bool haveAVX512PF() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u); -} - -bool haveAVX512ER() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u); -} - -bool haveAVX512CD() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u); -} - -bool haveSHA() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u); -} - -bool haveAVX512BW() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u); -} - -bool haveAVX512VL() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u); -} - -bool havePREFETCHWT1() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u); -} - -bool haveAVX512VBMI() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u); -} - -bool haveAVX512VBMI2() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u); -} - -bool haveRDRAND() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u); -} - -inline bool haveAMX() noexcept -{ -#if defined(__x86_64__) - // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf - return haveOSXSAVE() // implies haveXSAVE() - && ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS -#else - return false; -#endif -} - -bool haveAMXBF16() noexcept -{ - return haveAMX() - && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit -} - -bool haveAMXTILE() noexcept -{ - return haveAMX() - && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit -} - -bool haveAMXINT8() noexcept -{ - return haveAMX() - && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit -} - struct CPUFlagsCache { #define DEF_NAME(X) static inline bool have_##X = have##X(); From 4ad485a76934c0bd7167f0b9a2cb2dadd9f8056c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Feb 2024 20:51:37 +0100 Subject: [PATCH 086/145] Add setting history check to stateless tests --- .../0_stateless/02995_baseline_23_12_1.tsv | 940 ++++++++++++++++++ .../02995_new_settings_history.reference | 0 .../0_stateless/02995_new_settings_history.sh | 46 + 3 files changed, 986 insertions(+) create mode 100644 tests/queries/0_stateless/02995_baseline_23_12_1.tsv create mode 100644 tests/queries/0_stateless/02995_new_settings_history.reference create mode 100755 tests/queries/0_stateless/02995_new_settings_history.sh diff --git a/tests/queries/0_stateless/02995_baseline_23_12_1.tsv b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv new file mode 100644 index 00000000000..4c0c9125b46 --- /dev/null +++ b/tests/queries/0_stateless/02995_baseline_23_12_1.tsv @@ -0,0 +1,940 @@ +add_http_cors_header 0 +additional_result_filter +additional_table_filters {} +aggregate_functions_null_for_empty 0 +aggregation_in_order_max_block_bytes 50000000 +aggregation_memory_efficient_merge_threads 0 +allow_aggregate_partitions_independently 0 +allow_asynchronous_read_from_io_pool_for_merge_tree 0 +allow_changing_replica_until_first_data_packet 0 +allow_create_index_without_type 0 +allow_custom_error_code_in_throwif 0 +allow_ddl 1 +allow_deprecated_database_ordinary 0 +allow_deprecated_syntax_for_merge_tree 0 +allow_distributed_ddl 1 +allow_drop_detached 0 +allow_execute_multiif_columnar 1 +allow_experimental_alter_materialized_view_structure 1 +allow_experimental_analyzer 0 +allow_experimental_annoy_index 0 +allow_experimental_bigint_types 1 +allow_experimental_codecs 0 +allow_experimental_database_atomic 1 +allow_experimental_database_materialized_mysql 0 +allow_experimental_database_materialized_postgresql 0 +allow_experimental_database_replicated 0 +allow_experimental_funnel_functions 0 +allow_experimental_geo_types 1 +allow_experimental_hash_functions 0 +allow_experimental_inverted_index 0 +allow_experimental_lightweight_delete 1 +allow_experimental_live_view 0 +allow_experimental_map_type 1 +allow_experimental_materialized_postgresql_table 0 +allow_experimental_nlp_functions 0 +allow_experimental_object_type 0 +allow_experimental_parallel_reading_from_replicas 0 +allow_experimental_projection_optimization 1 +allow_experimental_query_cache 1 +allow_experimental_query_deduplication 0 +allow_experimental_refreshable_materialized_view 0 +allow_experimental_s3queue 1 +allow_experimental_shared_merge_tree 0 +allow_experimental_statistic 0 +allow_experimental_undrop_table_query 1 +allow_experimental_usearch_index 0 +allow_experimental_window_functions 1 +allow_experimental_window_view 0 +allow_hyperscan 1 +allow_introspection_functions 0 +allow_named_collection_override_by_default 1 +allow_non_metadata_alters 1 +allow_nonconst_timezone_arguments 0 +allow_nondeterministic_mutations 0 +allow_nondeterministic_optimize_skip_unused_shards 0 +allow_prefetched_read_pool_for_local_filesystem 0 +allow_prefetched_read_pool_for_remote_filesystem 1 +allow_push_predicate_when_subquery_contains_with 1 +allow_settings_after_format_in_insert 0 +allow_simdjson 1 +allow_statistic_optimize 0 +allow_suspicious_codecs 0 +allow_suspicious_fixed_string_types 0 +allow_suspicious_indices 0 +allow_suspicious_low_cardinality_types 0 +allow_suspicious_ttl_expressions 0 +allow_unrestricted_reads_from_keeper 0 +alter_move_to_space_execute_async 0 +alter_partition_verbose_result 0 +alter_sync 1 +analyze_index_with_space_filling_curves 1 +annoy_index_search_k_nodes -1 +any_join_distinct_right_table_keys 0 +apply_deleted_mask 1 +apply_mutations_on_fly 0 +asterisk_include_alias_columns 0 +asterisk_include_materialized_columns 0 +async_insert 0 +async_insert_busy_timeout_ms 200 +async_insert_cleanup_timeout_ms 1000 +async_insert_deduplicate 0 +async_insert_max_data_size 1000000 +async_insert_max_query_number 450 +async_insert_stale_timeout_ms 0 +async_insert_threads 16 +async_query_sending_for_remote 1 +async_socket_for_remote 1 +azure_create_new_file_on_insert 0 +azure_list_object_keys_size 1000 +azure_max_single_part_upload_size 104857600 +azure_max_single_read_retries 4 +azure_truncate_on_insert 0 +background_buffer_flush_schedule_pool_size 16 +background_common_pool_size 8 +background_distributed_schedule_pool_size 16 +background_fetches_pool_size 8 +background_merges_mutations_concurrency_ratio 2 +background_message_broker_schedule_pool_size 16 +background_move_pool_size 8 +background_pool_size 16 +background_schedule_pool_size 128 +backup_restore_batch_size_for_keeper_multi 1000 +backup_restore_batch_size_for_keeper_multiread 10000 +backup_restore_keeper_fault_injection_probability 0 +backup_restore_keeper_fault_injection_seed 0 +backup_restore_keeper_max_retries 20 +backup_restore_keeper_retry_initial_backoff_ms 100 +backup_restore_keeper_retry_max_backoff_ms 5000 +backup_restore_keeper_value_max_size 1048576 +backup_threads 16 +bool_false_representation false +bool_true_representation true +cache_warmer_threads 4 +calculate_text_stack_trace 1 +cancel_http_readonly_queries_on_client_close 0 +cast_ipv4_ipv6_default_on_conversion_error 0 +cast_keep_nullable 0 +check_query_single_value_result 1 +check_referential_table_dependencies 0 +check_table_dependencies 1 +checksum_on_read 1 +cloud_mode 0 +cloud_mode_engine 1 +cluster_for_parallel_replicas +collect_hash_table_stats_during_aggregation 1 +column_names_for_schema_inference +compatibility +compatibility_ignore_auto_increment_in_create_table 0 +compatibility_ignore_collation_in_create_table 1 +compile_aggregate_expressions 1 +compile_expressions 0 +compile_sort_description 1 +connect_timeout 10 +connect_timeout_with_failover_ms 1000 +connect_timeout_with_failover_secure_ms 1000 +connection_pool_max_wait_ms 0 +connections_with_failover_max_tries 3 +convert_query_to_cnf 0 +count_distinct_implementation uniqExact +count_distinct_optimization 0 +create_index_ignore_unique 0 +create_replicated_merge_tree_fault_injection_probability 0 +create_table_empty_primary_key_by_default 0 +cross_to_inner_join_rewrite 1 +data_type_default_nullable 0 +database_atomic_wait_for_drop_and_detach_synchronously 0 +database_replicated_allow_only_replicated_engine 0 +database_replicated_allow_replicated_engine_arguments 1 +database_replicated_always_detach_permanently 0 +database_replicated_ddl_output 1 +database_replicated_enforce_synchronous_settings 0 +database_replicated_initial_query_timeout_sec 300 +date_time_input_format basic +date_time_output_format simple +date_time_overflow_behavior ignore +decimal_check_overflow 1 +deduplicate_blocks_in_dependent_materialized_views 0 +default_database_engine Atomic +default_max_bytes_in_join 1000000000 +default_table_engine None +default_temporary_table_engine Memory +describe_compact_output 0 +describe_extend_object_types 0 +describe_include_subcolumns 0 +describe_include_virtual_columns 0 +dialect clickhouse +dictionary_use_async_executor 0 +distinct_overflow_mode throw +distributed_aggregation_memory_efficient 1 +distributed_background_insert_batch 0 +distributed_background_insert_max_sleep_time_ms 30000 +distributed_background_insert_sleep_time_ms 100 +distributed_background_insert_split_batch_on_failure 0 +distributed_background_insert_timeout 0 +distributed_connections_pool_size 1024 +distributed_ddl_entry_format_version 5 +distributed_ddl_output_mode throw +distributed_ddl_task_timeout 180 +distributed_directory_monitor_batch_inserts 0 +distributed_directory_monitor_max_sleep_time_ms 30000 +distributed_directory_monitor_sleep_time_ms 100 +distributed_directory_monitor_split_batch_on_failure 0 +distributed_foreground_insert 0 +distributed_group_by_no_merge 0 +distributed_product_mode deny +distributed_push_down_limit 1 +distributed_replica_error_cap 1000 +distributed_replica_error_half_life 60 +distributed_replica_max_ignored_errors 0 +do_not_merge_across_partitions_select_final 0 +drain_timeout 3 +empty_result_for_aggregation_by_constant_keys_on_empty_set 1 +empty_result_for_aggregation_by_empty_set 0 +enable_debug_queries 0 +enable_deflate_qpl_codec 0 +enable_early_constant_folding 1 +enable_extended_results_for_datetime_functions 0 +enable_filesystem_cache 1 +enable_filesystem_cache_log 0 +enable_filesystem_cache_on_write_operations 0 +enable_filesystem_read_prefetches_log 0 +enable_global_with_statement 1 +enable_http_compression 0 +enable_job_stack_trace 0 +enable_lightweight_delete 1 +enable_memory_bound_merging_of_aggregation_results 1 +enable_multiple_prewhere_read_steps 1 +enable_optimize_predicate_expression 1 +enable_optimize_predicate_expression_to_final_subquery 1 +enable_order_by_all 1 +enable_positional_arguments 1 +enable_reads_from_query_cache 1 +enable_s3_requests_logging 0 +enable_scalar_subquery_optimization 1 +enable_sharing_sets_for_mutations 1 +enable_software_prefetch_in_aggregation 1 +enable_unaligned_array_join 0 +enable_url_encoding 1 +enable_writes_to_query_cache 1 +engine_file_allow_create_multiple_files 0 +engine_file_empty_if_not_exists 0 +engine_file_skip_empty_files 0 +engine_file_truncate_on_insert 0 +engine_url_skip_empty_files 0 +errors_output_format CSV +exact_rows_before_limit 0 +except_default_mode ALL +external_storage_connect_timeout_sec 10 +external_storage_max_read_bytes 0 +external_storage_max_read_rows 0 +external_storage_rw_timeout_sec 300 +external_table_functions_use_nulls 1 +external_table_strict_query 0 +extract_kvp_max_pairs_per_row 1000 +extremes 0 +fallback_to_stale_replicas_for_distributed_queries 1 +filesystem_cache_max_download_size 137438953472 +filesystem_cache_segments_batch_size 20 +filesystem_prefetch_max_memory_usage 1073741824 +filesystem_prefetch_min_bytes_for_single_read_task 2097152 +filesystem_prefetch_step_bytes 0 +filesystem_prefetch_step_marks 0 +filesystem_prefetches_limit 200 +final 0 +flatten_nested 1 +force_aggregate_partitions_independently 0 +force_aggregation_in_order 0 +force_data_skipping_indices +force_grouping_standard_compatibility 1 +force_index_by_date 0 +force_optimize_projection 0 +force_optimize_projection_name +force_optimize_skip_unused_shards 0 +force_optimize_skip_unused_shards_nesting 0 +force_primary_key 0 +force_remove_data_recursively_on_drop 0 +format_avro_schema_registry_url +format_binary_max_array_size 1073741824 +format_binary_max_string_size 1073741824 +format_capn_proto_enum_comparising_mode by_values +format_capn_proto_use_autogenerated_schema 1 +format_csv_allow_double_quotes 1 +format_csv_allow_single_quotes 0 +format_csv_delimiter , +format_csv_null_representation \\N +format_custom_escaping_rule Escaped +format_custom_field_delimiter \t +format_custom_result_after_delimiter +format_custom_result_before_delimiter +format_custom_row_after_delimiter \n +format_custom_row_before_delimiter +format_custom_row_between_delimiter +format_display_secrets_in_show_and_select 0 +format_json_object_each_row_column_for_object_name +format_protobuf_use_autogenerated_schema 1 +format_regexp +format_regexp_escaping_rule Raw +format_regexp_skip_unmatched 0 +format_schema +format_template_resultset +format_template_row +format_template_rows_between_delimiter \n +format_tsv_null_representation \\N +formatdatetime_f_prints_single_zero 0 +formatdatetime_format_without_leading_zeros 0 +formatdatetime_parsedatetime_m_is_month_name 1 +fsync_metadata 1 +function_implementation +function_json_value_return_type_allow_complex 0 +function_json_value_return_type_allow_nullable 0 +function_range_max_elements_in_block 500000000 +function_sleep_max_microseconds_per_block 3000000 +glob_expansion_max_elements 1000 +grace_hash_join_initial_buckets 1 +grace_hash_join_max_buckets 1024 +group_by_overflow_mode throw +group_by_two_level_threshold 100000 +group_by_two_level_threshold_bytes 50000000 +group_by_use_nulls 0 +handle_kafka_error_mode default +handshake_timeout_ms 10000 +hdfs_create_new_file_on_insert 0 +hdfs_replication 0 +hdfs_skip_empty_files 0 +hdfs_truncate_on_insert 0 +hedged_connection_timeout_ms 50 +hsts_max_age 0 +http_connection_timeout 1 +http_headers_progress_interval_ms 100 +http_make_head_request 1 +http_max_chunk_size 107374182400 +http_max_field_name_size 131072 +http_max_field_value_size 131072 +http_max_fields 1000000 +http_max_multipart_form_data_size 1073741824 +http_max_request_param_data_size 10485760 +http_max_tries 10 +http_max_uri_size 1048576 +http_native_compression_disable_checksumming_on_decompress 0 +http_receive_timeout 30 +http_response_buffer_size 0 +http_retry_initial_backoff_ms 100 +http_retry_max_backoff_ms 10000 +http_send_timeout 30 +http_skip_not_found_url_for_globs 1 +http_wait_end_of_query 0 +http_write_exception_in_output_format 1 +http_zlib_compression_level 3 +idle_connection_timeout 3600 +ignore_cold_parts_seconds 0 +ignore_data_skipping_indices +ignore_on_cluster_for_replicated_access_entities_queries 0 +ignore_on_cluster_for_replicated_udf_queries 0 +implicit_transaction 0 +input_format_allow_errors_num 0 +input_format_allow_errors_ratio 0 +input_format_allow_seeks 1 +input_format_arrow_allow_missing_columns 1 +input_format_arrow_case_insensitive_column_matching 0 +input_format_arrow_import_nested 0 +input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_avro_allow_missing_fields 0 +input_format_avro_null_as_default 0 +input_format_bson_skip_fields_with_unsupported_types_in_schema_inference 0 +input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference 0 +input_format_csv_allow_cr_end_of_line 0 +input_format_csv_allow_variable_number_of_columns 0 +input_format_csv_allow_whitespace_or_tab_as_delimiter 0 +input_format_csv_arrays_as_nested_csv 0 +input_format_csv_detect_header 1 +input_format_csv_empty_as_default 1 +input_format_csv_enum_as_number 0 +input_format_csv_skip_first_lines 0 +input_format_csv_skip_trailing_empty_lines 0 +input_format_csv_trim_whitespaces 1 +input_format_csv_try_infer_numbers_from_strings 0 +input_format_csv_use_best_effort_in_schema_inference 1 +input_format_csv_use_default_on_bad_values 0 +input_format_custom_allow_variable_number_of_columns 0 +input_format_custom_detect_header 1 +input_format_custom_skip_trailing_empty_lines 0 +input_format_defaults_for_omitted_fields 1 +input_format_hive_text_collection_items_delimiter  +input_format_hive_text_fields_delimiter  +input_format_hive_text_map_keys_delimiter  +input_format_import_nested_json 0 +input_format_ipv4_default_on_conversion_error 0 +input_format_ipv6_default_on_conversion_error 0 +input_format_json_compact_allow_variable_number_of_columns 0 +input_format_json_defaults_for_missing_elements_in_named_tuple 1 +input_format_json_ignore_unknown_keys_in_named_tuple 1 +input_format_json_infer_incomplete_types_as_strings 1 +input_format_json_named_tuples_as_objects 1 +input_format_json_read_arrays_as_strings 1 +input_format_json_read_bools_as_numbers 1 +input_format_json_read_numbers_as_strings 1 +input_format_json_read_objects_as_strings 1 +input_format_json_try_infer_named_tuples_from_objects 1 +input_format_json_try_infer_numbers_from_strings 0 +input_format_json_validate_types_from_metadata 1 +input_format_max_bytes_to_read_for_schema_inference 33554432 +input_format_max_rows_to_read_for_schema_inference 25000 +input_format_msgpack_number_of_columns 0 +input_format_mysql_dump_map_column_names 1 +input_format_mysql_dump_table_name +input_format_native_allow_types_conversion 1 +input_format_null_as_default 1 +input_format_orc_allow_missing_columns 1 +input_format_orc_case_insensitive_column_matching 0 +input_format_orc_filter_push_down 1 +input_format_orc_import_nested 0 +input_format_orc_row_batch_size 100000 +input_format_orc_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_orc_use_fast_decoder 1 +input_format_parallel_parsing 1 +input_format_parquet_allow_missing_columns 1 +input_format_parquet_case_insensitive_column_matching 0 +input_format_parquet_filter_push_down 1 +input_format_parquet_import_nested 0 +input_format_parquet_local_file_min_bytes_for_seek 8192 +input_format_parquet_max_block_size 8192 +input_format_parquet_preserve_order 0 +input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference 0 +input_format_protobuf_flatten_google_wrappers 0 +input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference 0 +input_format_record_errors_file_path +input_format_skip_unknown_fields 1 +input_format_try_infer_dates 1 +input_format_try_infer_datetimes 1 +input_format_try_infer_integers 1 +input_format_tsv_allow_variable_number_of_columns 0 +input_format_tsv_detect_header 1 +input_format_tsv_empty_as_default 0 +input_format_tsv_enum_as_number 0 +input_format_tsv_skip_first_lines 0 +input_format_tsv_skip_trailing_empty_lines 0 +input_format_tsv_use_best_effort_in_schema_inference 1 +input_format_values_accurate_types_of_literals 1 +input_format_values_allow_data_after_semicolon 0 +input_format_values_deduce_templates_of_expressions 1 +input_format_values_interpret_expressions 1 +input_format_with_names_use_header 1 +input_format_with_types_use_header 1 +insert_allow_materialized_columns 0 +insert_deduplicate 1 +insert_deduplication_token +insert_distributed_one_random_shard 0 +insert_distributed_sync 0 +insert_distributed_timeout 0 +insert_keeper_fault_injection_probability 0 +insert_keeper_fault_injection_seed 0 +insert_keeper_max_retries 20 +insert_keeper_retry_initial_backoff_ms 100 +insert_keeper_retry_max_backoff_ms 10000 +insert_null_as_default 1 +insert_quorum 0 +insert_quorum_parallel 1 +insert_quorum_timeout 600000 +insert_shard_id 0 +interactive_delay 100000 +intersect_default_mode ALL +interval_output_format numeric +join_algorithm default +join_any_take_last_row 0 +join_default_strictness ALL +join_on_disk_max_files_to_merge 64 +join_overflow_mode throw +join_use_nulls 0 +joined_subquery_requires_alias 1 +kafka_disable_num_consumers_limit 0 +kafka_max_wait_ms 5000 +keeper_map_strict_mode 0 +legacy_column_name_of_tuple_literal 0 +limit 0 +live_view_heartbeat_interval 15 +load_balancing random +load_balancing_first_offset 0 +load_marks_asynchronously 0 +local_filesystem_read_method pread_threadpool +local_filesystem_read_prefetch 0 +lock_acquire_timeout 120 +log_comment +log_formatted_queries 0 +log_processors_profiles 0 +log_profile_events 1 +log_queries 1 +log_queries_cut_to_length 100000 +log_queries_min_query_duration_ms 0 +log_queries_min_type QUERY_START +log_queries_probability 1 +log_query_settings 1 +log_query_threads 0 +log_query_views 1 +low_cardinality_allow_in_native_format 1 +low_cardinality_max_dictionary_size 8192 +low_cardinality_use_single_dictionary_for_part 0 +materialize_ttl_after_modify 1 +materialized_views_ignore_errors 0 +max_alter_threads \'auto(16)\' +max_analyze_depth 5000 +max_ast_depth 1000 +max_ast_elements 50000 +max_backup_bandwidth 0 +max_block_size 65409 +max_bytes_before_external_group_by 0 +max_bytes_before_external_sort 0 +max_bytes_before_remerge_sort 1000000000 +max_bytes_in_distinct 0 +max_bytes_in_join 0 +max_bytes_in_set 0 +max_bytes_to_read 0 +max_bytes_to_read_leaf 0 +max_bytes_to_sort 0 +max_bytes_to_transfer 0 +max_columns_to_read 0 +max_compress_block_size 1048576 +max_concurrent_queries_for_all_users 0 +max_concurrent_queries_for_user 0 +max_distributed_connections 1024 +max_distributed_depth 5 +max_download_buffer_size 10485760 +max_download_threads 4 +max_entries_for_hash_table_stats 10000 +max_execution_speed 0 +max_execution_speed_bytes 0 +max_execution_time 0 +max_execution_time_leaf 0 +max_expanded_ast_elements 500000 +max_fetch_partition_retries_count 5 +max_final_threads \'auto(16)\' +max_http_get_redirects 0 +max_hyperscan_regexp_length 0 +max_hyperscan_regexp_total_length 0 +max_insert_block_size 1048449 +max_insert_delayed_streams_for_parallel_write 0 +max_insert_threads 0 +max_joined_block_size_rows 65409 +max_limit_for_ann_queries 1000000 +max_live_view_insert_blocks_before_refresh 64 +max_local_read_bandwidth 0 +max_local_write_bandwidth 0 +max_memory_usage 0 +max_memory_usage_for_all_queries 0 +max_memory_usage_for_user 0 +max_network_bandwidth 0 +max_network_bandwidth_for_all_users 0 +max_network_bandwidth_for_user 0 +max_network_bytes 0 +max_number_of_partitions_for_independent_aggregation 128 +max_parallel_replicas 1 +max_parser_depth 1000 +max_partition_size_to_drop 50000000000 +max_partitions_per_insert_block 100 +max_partitions_to_read -1 +max_pipeline_depth 0 +max_query_size 262144 +max_read_buffer_size 1048576 +max_read_buffer_size_local_fs 131072 +max_read_buffer_size_remote_fs 0 +max_remote_read_network_bandwidth 0 +max_remote_read_network_bandwidth_for_server 0 +max_remote_write_network_bandwidth 0 +max_remote_write_network_bandwidth_for_server 0 +max_replica_delay_for_distributed_queries 300 +max_replicated_fetches_network_bandwidth_for_server 0 +max_replicated_sends_network_bandwidth_for_server 0 +max_result_bytes 0 +max_result_rows 0 +max_rows_in_distinct 0 +max_rows_in_join 0 +max_rows_in_set 0 +max_rows_in_set_to_optimize_join 100000 +max_rows_to_group_by 0 +max_rows_to_read 0 +max_rows_to_read_leaf 0 +max_rows_to_sort 0 +max_rows_to_transfer 0 +max_sessions_for_user 0 +max_size_to_preallocate_for_aggregation 100000000 +max_streams_for_merge_tree_reading 0 +max_streams_multiplier_for_merge_tables 5 +max_streams_to_max_threads_ratio 1 +max_subquery_depth 100 +max_table_size_to_drop 50000000000 +max_temporary_columns 0 +max_temporary_data_on_disk_size_for_query 0 +max_temporary_data_on_disk_size_for_user 0 +max_temporary_non_const_columns 0 +max_threads \'auto(16)\' +max_threads_for_annoy_index_creation 4 +max_threads_for_indexes 0 +max_untracked_memory 4194304 +memory_overcommit_ratio_denominator 1073741824 +memory_overcommit_ratio_denominator_for_user 1073741824 +memory_profiler_sample_max_allocation_size 0 +memory_profiler_sample_min_allocation_size 0 +memory_profiler_sample_probability 0 +memory_profiler_step 4194304 +memory_tracker_fault_probability 0 +memory_usage_overcommit_max_wait_microseconds 5000000 +merge_tree_clear_old_parts_interval_seconds 1 +merge_tree_clear_old_temporary_directories_interval_seconds 60 +merge_tree_coarse_index_granularity 8 +merge_tree_compact_parts_min_granules_to_multibuffer_read 16 +merge_tree_determine_task_size_by_prewhere_columns 1 +merge_tree_max_bytes_to_use_cache 2013265920 +merge_tree_max_rows_to_use_cache 1048576 +merge_tree_min_bytes_for_concurrent_read 251658240 +merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem 251658240 +merge_tree_min_bytes_for_seek 0 +merge_tree_min_bytes_per_task_for_remote_reading 4194304 +merge_tree_min_rows_for_concurrent_read 163840 +merge_tree_min_rows_for_concurrent_read_for_remote_filesystem 163840 +merge_tree_min_rows_for_seek 0 +merge_tree_use_const_size_tasks_for_remote_reading 1 +metrics_perf_events_enabled 0 +metrics_perf_events_list +min_bytes_to_use_direct_io 0 +min_bytes_to_use_mmap_io 0 +min_chunk_bytes_for_parallel_parsing 10485760 +min_compress_block_size 65536 +min_count_to_compile_aggregate_expression 3 +min_count_to_compile_expression 3 +min_count_to_compile_sort_description 3 +min_execution_speed 0 +min_execution_speed_bytes 0 +min_free_disk_space_for_temporary_data 0 +min_hit_rate_to_use_consecutive_keys_optimization 0.5 +min_insert_block_size_bytes 268402944 +min_insert_block_size_bytes_for_materialized_views 0 +min_insert_block_size_rows 1048449 +min_insert_block_size_rows_for_materialized_views 0 +move_all_conditions_to_prewhere 1 +move_primary_key_columns_to_end_of_prewhere 1 +multiple_joins_rewriter_version 0 +multiple_joins_try_to_keep_original_names 0 +mutations_execute_nondeterministic_on_initiator 0 +mutations_execute_subqueries_on_initiator 0 +mutations_max_literal_size_to_replace 16384 +mutations_sync 0 +mysql_datatypes_support_level +mysql_map_fixed_string_to_text_in_show_columns 0 +mysql_map_string_to_text_in_show_columns 0 +mysql_max_rows_to_insert 65536 +network_compression_method LZ4 +network_zstd_compression_level 1 +normalize_function_names 1 +number_of_mutations_to_delay 0 +number_of_mutations_to_throw 0 +odbc_bridge_connection_pool_size 16 +odbc_bridge_use_connection_pooling 1 +odbc_max_field_size 0 +offset 0 +opentelemetry_start_trace_probability 0 +opentelemetry_trace_processors 0 +optimize_aggregation_in_order 0 +optimize_aggregators_of_group_by_keys 1 +optimize_append_index 0 +optimize_arithmetic_operations_in_aggregate_functions 1 +optimize_count_from_files 1 +optimize_distinct_in_order 1 +optimize_distributed_group_by_sharding_key 1 +optimize_duplicate_order_by_and_distinct 0 +optimize_functions_to_subcolumns 0 +optimize_fuse_sum_count_avg 0 +optimize_group_by_constant_keys 1 +optimize_group_by_function_keys 1 +optimize_if_chain_to_multiif 0 +optimize_if_transform_strings_to_enum 0 +optimize_injective_functions_inside_uniq 1 +optimize_min_equality_disjunction_chain_length 3 +optimize_min_inequality_conjunction_chain_length 3 +optimize_monotonous_functions_in_order_by 0 +optimize_move_functions_out_of_any 0 +optimize_move_to_prewhere 1 +optimize_move_to_prewhere_if_final 0 +optimize_multiif_to_if 1 +optimize_normalize_count_variants 1 +optimize_on_insert 1 +optimize_or_like_chain 0 +optimize_read_in_order 1 +optimize_read_in_window_order 1 +optimize_redundant_functions_in_order_by 1 +optimize_respect_aliases 1 +optimize_rewrite_aggregate_function_with_if 1 +optimize_rewrite_array_exists_to_has 0 +optimize_rewrite_sum_if_to_count_if 0 +optimize_skip_merged_partitions 0 +optimize_skip_unused_shards 0 +optimize_skip_unused_shards_limit 1000 +optimize_skip_unused_shards_nesting 0 +optimize_skip_unused_shards_rewrite_in 1 +optimize_sorting_by_input_stream_properties 1 +optimize_substitute_columns 0 +optimize_syntax_fuse_functions 0 +optimize_throw_if_noop 0 +optimize_trivial_approximate_count_query 0 +optimize_trivial_count_query 1 +optimize_trivial_insert_select 1 +optimize_uniq_to_count 1 +optimize_use_implicit_projections 1 +optimize_use_projections 1 +optimize_using_constraints 0 +os_thread_priority 0 +output_format_arrow_compression_method lz4_frame +output_format_arrow_fixed_string_as_fixed_byte_array 1 +output_format_arrow_low_cardinality_as_dictionary 0 +output_format_arrow_string_as_string 0 +output_format_avro_codec +output_format_avro_rows_in_file 1 +output_format_avro_string_column_pattern +output_format_avro_sync_interval 16384 +output_format_bson_string_as_string 0 +output_format_csv_crlf_end_of_line 0 +output_format_decimal_trailing_zeros 0 +output_format_enable_streaming 0 +output_format_json_array_of_rows 0 +output_format_json_escape_forward_slashes 1 +output_format_json_named_tuples_as_objects 1 +output_format_json_quote_64bit_floats 0 +output_format_json_quote_64bit_integers 1 +output_format_json_quote_decimals 0 +output_format_json_quote_denormals 0 +output_format_json_skip_null_value_in_named_tuples 0 +output_format_json_validate_utf8 0 +output_format_markdown_escape_special_characters 0 +output_format_msgpack_uuid_representation ext +output_format_orc_compression_method lz4 +output_format_orc_row_index_stride 10000 +output_format_orc_string_as_string 0 +output_format_parallel_formatting 1 +output_format_parquet_batch_size 1024 +output_format_parquet_compliant_nested_types 1 +output_format_parquet_compression_method lz4 +output_format_parquet_data_page_size 1048576 +output_format_parquet_fixed_string_as_fixed_byte_array 1 +output_format_parquet_parallel_encoding 1 +output_format_parquet_row_group_size 1000000 +output_format_parquet_row_group_size_bytes 536870912 +output_format_parquet_string_as_string 0 +output_format_parquet_use_custom_encoder 0 +output_format_parquet_version 2.latest +output_format_pretty_color 1 +output_format_pretty_grid_charset UTF-8 +output_format_pretty_max_column_pad_width 250 +output_format_pretty_max_rows 10000 +output_format_pretty_max_value_width 10000 +output_format_pretty_row_numbers 0 +output_format_protobuf_nullables_with_google_wrappers 0 +output_format_schema +output_format_sql_insert_include_column_names 1 +output_format_sql_insert_max_batch_size 65409 +output_format_sql_insert_quote_names 1 +output_format_sql_insert_table_name table +output_format_sql_insert_use_replace 0 +output_format_tsv_crlf_end_of_line 0 +output_format_write_statistics 1 +parallel_distributed_insert_select 0 +parallel_replica_offset 0 +parallel_replicas_count 0 +parallel_replicas_custom_key +parallel_replicas_custom_key_filter_type default +parallel_replicas_for_non_replicated_merge_tree 0 +parallel_replicas_min_number_of_granules_to_enable 0 +parallel_replicas_min_number_of_rows_per_replica 0 +parallel_replicas_single_task_marks_count_multiplier 2 +parallel_view_processing 0 +parallelize_output_from_storages 1 +parsedatetime_parse_without_leading_zeros 1 +partial_merge_join_left_table_buffer_bytes 0 +partial_merge_join_optimizations 0 +partial_merge_join_rows_in_right_blocks 65536 +partial_result_on_first_cancel 0 +parts_to_delay_insert 0 +parts_to_throw_insert 0 +periodic_live_view_refresh 60 +poll_interval 10 +postgresql_connection_pool_auto_close_connection 0 +postgresql_connection_pool_size 16 +postgresql_connection_pool_wait_timeout 5000 +precise_float_parsing 0 +prefer_column_name_to_alias 0 +prefer_global_in_and_join 0 +prefer_localhost_replica 1 +prefer_warmed_unmerged_parts_seconds 0 +preferred_block_size_bytes 1000000 +preferred_max_column_in_block_size_bytes 0 +preferred_optimize_projection_name +prefetch_buffer_size 1048576 +print_pretty_type_names 0 +priority 0 +query_cache_compress_entries 1 +query_cache_max_entries 0 +query_cache_max_size_in_bytes 0 +query_cache_min_query_duration 0 +query_cache_min_query_runs 0 +query_cache_nondeterministic_function_handling throw +query_cache_share_between_users 0 +query_cache_squash_partial_results 1 +query_cache_store_results_of_queries_with_nondeterministic_functions 0 +query_cache_ttl 60 +query_plan_aggregation_in_order 1 +query_plan_enable_multithreading_after_window_functions 1 +query_plan_enable_optimizations 1 +query_plan_execute_functions_after_sorting 1 +query_plan_filter_push_down 1 +query_plan_lift_up_array_join 1 +query_plan_lift_up_union 1 +query_plan_max_optimizations_to_apply 10000 +query_plan_merge_expressions 1 +query_plan_optimize_primary_key 1 +query_plan_optimize_projection 1 +query_plan_push_down_limit 1 +query_plan_read_in_order 1 +query_plan_remove_redundant_distinct 1 +query_plan_remove_redundant_sorting 1 +query_plan_reuse_storage_ordering_for_window_functions 1 +query_plan_split_filter 1 +query_profiler_cpu_time_period_ns 1000000000 +query_profiler_real_time_period_ns 1000000000 +queue_max_wait_ms 0 +rabbitmq_max_wait_ms 5000 +read_backoff_max_throughput 1048576 +read_backoff_min_concurrency 1 +read_backoff_min_events 2 +read_backoff_min_interval_between_events_ms 1000 +read_backoff_min_latency_ms 1000 +read_from_filesystem_cache_if_exists_otherwise_bypass_cache 0 +read_in_order_two_level_merge_threshold 100 +read_overflow_mode throw +read_overflow_mode_leaf throw +read_priority 0 +readonly 0 +receive_data_timeout_ms 2000 +receive_timeout 300 +regexp_dict_allow_hyperscan 1 +regexp_dict_flag_case_insensitive 0 +regexp_dict_flag_dotall 0 +regexp_max_matches_per_row 1000 +reject_expensive_hyperscan_regexps 1 +remerge_sort_lowered_memory_bytes_ratio 2 +remote_filesystem_read_method threadpool +remote_filesystem_read_prefetch 1 +remote_fs_read_backoff_max_tries 5 +remote_fs_read_max_backoff_ms 10000 +remote_read_min_bytes_for_seek 4194304 +rename_files_after_processing +replace_running_query 0 +replace_running_query_max_wait_ms 5000 +replication_alter_columns_timeout 60 +replication_alter_partitions_sync 1 +replication_wait_for_inactive_replica_timeout 120 +restore_threads 16 +result_overflow_mode throw +rewrite_count_distinct_if_with_count_distinct_implementation 0 +s3_allow_parallel_part_upload 1 +s3_check_objects_after_upload 0 +s3_create_new_file_on_insert 0 +s3_disable_checksum 0 +s3_http_connection_pool_size 1000 +s3_list_object_keys_size 1000 +s3_max_connections 1024 +s3_max_get_burst 0 +s3_max_get_rps 0 +s3_max_inflight_parts_for_one_file 20 +s3_max_put_burst 0 +s3_max_put_rps 0 +s3_max_redirects 10 +s3_max_single_part_upload_size 33554432 +s3_max_single_read_retries 4 +s3_max_unexpected_write_error_retries 4 +s3_max_upload_part_size 5368709120 +s3_min_upload_part_size 16777216 +s3_request_timeout_ms 30000 +s3_retry_attempts 100 +s3_skip_empty_files 0 +s3_strict_upload_part_size 0 +s3_throw_on_zero_files_match 0 +s3_truncate_on_insert 0 +s3_upload_part_size_multiply_factor 2 +s3_upload_part_size_multiply_parts_count_threshold 500 +s3_use_adaptive_timeouts 1 +s3queue_default_zookeeper_path /clickhouse/s3queue/ +s3queue_enable_logging_to_s3queue_log 0 +schema_inference_cache_require_modification_time_for_url 1 +schema_inference_hints +schema_inference_make_columns_nullable 1 +schema_inference_mode default +schema_inference_use_cache_for_azure 1 +schema_inference_use_cache_for_file 1 +schema_inference_use_cache_for_hdfs 1 +schema_inference_use_cache_for_s3 1 +schema_inference_use_cache_for_url 1 +select_sequential_consistency 0 +send_logs_level fatal +send_logs_source_regexp +send_progress_in_http_headers 0 +send_timeout 300 +session_timezone +set_overflow_mode throw +short_circuit_function_evaluation enable +show_table_uuid_in_table_create_query_if_not_nil 0 +single_join_prefer_left_table 1 +skip_download_if_exceeds_query_cache 1 +skip_unavailable_shards 0 +sleep_after_receiving_query_ms 0 +sleep_in_send_data_ms 0 +sleep_in_send_tables_status_ms 0 +sort_overflow_mode throw +splitby_max_substrings_includes_remaining_string 0 +stop_refreshable_materialized_views_on_startup 0 +storage_file_read_method pread +storage_system_stack_trace_pipe_read_timeout_ms 100 +stream_flush_interval_ms 7500 +stream_like_engine_allow_direct_select 0 +stream_like_engine_insert_queue +stream_poll_timeout_ms 500 +system_events_show_zero_values 0 +table_function_remote_max_addresses 1000 +tcp_keep_alive_timeout 290 +temporary_files_codec LZ4 +temporary_live_view_timeout 1 +throw_if_no_data_to_insert 1 +throw_on_error_from_cache_on_write_operations 0 +throw_on_max_partitions_per_insert_block 1 +throw_on_unsupported_query_inside_transaction 1 +timeout_before_checking_execution_speed 10 +timeout_overflow_mode throw +timeout_overflow_mode_leaf throw +totals_auto_threshold 0.5 +totals_mode after_having_exclusive +trace_profile_events 0 +transfer_overflow_mode throw +transform_null_in 0 +union_default_mode +unknown_packet_in_send_data 0 +use_cache_for_count_from_files 1 +use_client_time_zone 0 +use_compact_format_in_distributed_parts_names 1 +use_concurrency_control 1 +use_hedged_requests 1 +use_index_for_in_with_subqueries 1 +use_index_for_in_with_subqueries_max_values 0 +use_local_cache_for_remote_storage 1 +use_mysql_types_in_show_columns 0 +use_query_cache 0 +use_skip_indexes 1 +use_skip_indexes_if_final 0 +use_structure_from_insertion_table_in_table_functions 2 +use_uncompressed_cache 0 +use_with_fill_by_sorting_prefix 1 +validate_polygons 1 +wait_changes_become_visible_after_commit_mode wait_unknown +wait_for_async_insert 1 +wait_for_async_insert_timeout 120 +wait_for_window_view_fire_signal_timeout 10 +window_view_clean_interval 60 +window_view_heartbeat_interval 15 +workload default +zstd_window_log_max 0 diff --git a/tests/queries/0_stateless/02995_new_settings_history.reference b/tests/queries/0_stateless/02995_new_settings_history.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh new file mode 100755 index 00000000000..8932e00086b --- /dev/null +++ b/tests/queries/0_stateless/02995_new_settings_history.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-cpu-aarch64 +# Some settings can be different for builds with sanitizers + +# Note that this is a broad check. A per version check is done in the upgrade test +# Baseline generated with 23.12 +# clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_23_12_1.tsv +$CLICKHOUSE_LOCAL --query " + WITH old_settings AS + ( + SELECT * FROM file('${CUR_DIR}/02995_baseline_23_12_1.tsv', 'TSV', 'name String, default String') + ), + new_settings AS + ( + select name, default from system.settings order by name + ) + SELECT * FROM + ( + SELECT 'PLEASE ADD THE NEW SETTING TO SettingsChangesHistory.h: ' || name || ' WAS ADDED', + FROM new_settings + WHERE (name NOT IN ( + SELECT name + FROM old_settings + )) AND (name NOT IN ( + SELECT arrayJoin(tupleElement(changes, 'name')) + FROM system.settings_changes + WHERE splitByChar('.', version())[1] >= '24' + )) + UNION ALL + ( + SELECT 'PLEASE ADD THE SETTING VALUE CHANGE TO SettingsChangesHistory.h: ' || name || ' WAS CHANGED FROM ' || old_settings.default || ' TO ' || new_settings.default, + FROM new_settings + LEFT JOIN old_settings ON new_settings.name = old_settings.name + WHERE (new_settings.default != old_settings.default) AND (name NOT IN ( + SELECT arrayJoin(tupleElement(changes, 'name')) + FROM system.settings_changes + WHERE splitByChar('.', version())[1] >= '24' + )) + ) + ) +" From 89006361c5f69f4de49e828bb0813677d8548c02 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 19 Feb 2024 20:52:36 +0100 Subject: [PATCH 087/145] Fix test --- .../integration/test_storage_rabbitmq/test.py | 42 ++++++++++++++----- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 5298d5d8ce2..4485bbe26df 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -47,6 +47,7 @@ instance3 = cluster.add_instance( "configs/mergetree.xml", ], with_rabbitmq=True, + stay_alive=True, ) # Helpers @@ -3567,6 +3568,20 @@ def test_attach_broken_table(rabbitmq_cluster): def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): table_name = "nack_failed_insert" exchange = f"{table_name}_exchange" + + credentials = pika.PlainCredentials("root", "clickhouse") + parameters = pika.ConnectionParameters( + rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials + ) + connection = pika.BlockingConnection(parameters) + channel = connection.channel() + + channel.exchange_declare(exchange='deadl') + + result = channel.queue_declare(queue='deadq') + queue_name = result.method.queue + channel.queue_bind(exchange='deadl', routing_key='', queue=queue_name) + instance3.query( f""" CREATE TABLE test.{table_name} (key UInt64, value UInt64) @@ -3574,7 +3589,8 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): SETTINGS rabbitmq_host_port = '{rabbitmq_cluster.rabbitmq_host}:5672', rabbitmq_flush_interval_ms=1000, rabbitmq_exchange_name = '{exchange}', - rabbitmq_format = 'JSONEachRow'; + rabbitmq_format = 'JSONEachRow', + rabbitmq_queue_settings_list='x-dead-letter-exchange=deadl'; DROP TABLE IF EXISTS test.view; CREATE TABLE test.view (key UInt64, value UInt64) @@ -3587,29 +3603,32 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): """ ) - credentials = pika.PlainCredentials("root", "clickhouse") - parameters = pika.ConnectionParameters( - rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials - ) - connection = pika.BlockingConnection(parameters) - channel = connection.channel() - num_rows = 25 for i in range(num_rows): message = json.dumps({"key": i, "value": i}) + "\n" channel.basic_publish(exchange=exchange, routing_key="", body=message) - connection.close() - instance3.wait_for_log_line( "Failed to push to views. Error: Code: 252. DB::Exception: Too many parts" ) instance3.replace_in_config( "/etc/clickhouse-server/config.d/mergetree.xml", - "parts_to_throw_insert>1", + "parts_to_throw_insert>0", "parts_to_throw_insert>10", ) + instance3.restart_clickhouse() + + count = [0] + def on_consume(channel, method, properties, body): + channel.basic_publish(exchange=exchange, routing_key="", body=body) + count[0] += 1 + if count[0] == num_rows: + channel.stop_consuming() + + channel.basic_consume(queue_name, on_consume) + channel.start_consuming() + attempt = 0 count = 0 while attempt < 100: @@ -3627,3 +3646,4 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): DROP TABLE test.{table_name}; """ ) + connection.close() From 0b2c3a7f0f2d101a203964ea44c4db0dd72e7f2b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Feb 2024 20:01:58 +0000 Subject: [PATCH 088/145] Automatic style fix --- tests/integration/test_storage_rabbitmq/test.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 4485bbe26df..280ce230921 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -3576,11 +3576,11 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): connection = pika.BlockingConnection(parameters) channel = connection.channel() - channel.exchange_declare(exchange='deadl') + channel.exchange_declare(exchange="deadl") - result = channel.queue_declare(queue='deadq') + result = channel.queue_declare(queue="deadq") queue_name = result.method.queue - channel.queue_bind(exchange='deadl', routing_key='', queue=queue_name) + channel.queue_bind(exchange="deadl", routing_key="", queue=queue_name) instance3.query( f""" @@ -3620,6 +3620,7 @@ def test_rabbitmq_nack_failed_insert(rabbitmq_cluster): instance3.restart_clickhouse() count = [0] + def on_consume(channel, method, properties, body): channel.basic_publish(exchange=exchange, routing_key="", body=body) count[0] += 1 From bb5a6dd8d3ce4a8560d4ee1a515c4fb64492535c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 19 Feb 2024 20:44:55 +0000 Subject: [PATCH 089/145] Fix: custom key failover test flakyness slowdown_count used only by hedge connections but it's stored inside connection pool and was used for pool shuffling always. So, query execution which used hedged connections could affect connection load balancing for queries w/o hedged connections by updating slowdown_count --- src/Client/ConnectionPoolWithFailover.cpp | 4 ++-- src/Client/ConnectionPoolWithFailover.h | 2 +- src/Client/HedgedConnectionsFactory.cpp | 3 ++- src/Common/PoolWithFailoverBase.h | 20 +++++++++++-------- .../test.py | 16 +++++---------- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index fdc0a11e533..d936d297e78 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -253,13 +253,13 @@ ConnectionPoolWithFailover::tryGetEntry( } std::vector -ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func) +ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func, bool use_slowdown_count) { if (!priority_func) priority_func = makeGetPriorityFunc(settings); UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value; - return Base::getShuffledPools(max_ignored_errors, priority_func); + return Base::getShuffledPools(max_ignored_errors, priority_func, use_slowdown_count); } } diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 7ccdd4787a4..fb60782806f 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -91,7 +91,7 @@ public: using Status = std::vector; Status getStatus() const; - std::vector getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}); + std::vector getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}, bool use_slowdown_count = false); size_t getMaxErrorCup() const { return Base::max_error_cap; } diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 82bacece415..f5b074a0257 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -40,7 +40,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory( , max_parallel_replicas(max_parallel_replicas_) , skip_unavailable_shards(skip_unavailable_shards_) { - shuffled_pools = pool->getShuffledPools(settings_, priority_func); + shuffled_pools = pool->getShuffledPools(settings_, priority_func, /* use_slowdown_count */ true); + for (const auto & shuffled_pool : shuffled_pools) replicas.emplace_back( std::make_unique(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get())); diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 8fd83300eff..0663fbd1143 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -133,7 +133,7 @@ protected: void updateErrorCounts(PoolStates & states, time_t & last_decrease_time) const; - std::vector getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority); + std::vector getShuffledPools(size_t max_ignored_errors, const GetPriorityFunc & get_priority, bool use_slowdown_count = false); inline void updateSharedErrorCounts(std::vector & shuffled_pools); @@ -160,7 +160,7 @@ protected: template std::vector::ShuffledPool> PoolWithFailoverBase::getShuffledPools( - size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority) + size_t max_ignored_errors, const PoolWithFailoverBase::GetPriorityFunc & get_priority, bool use_slowdown_count) { /// Update random numbers and error counts. PoolStates pool_states = updatePoolStates(max_ignored_errors); @@ -175,13 +175,13 @@ PoolWithFailoverBase::getShuffledPools( std::vector shuffled_pools; shuffled_pools.reserve(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) - shuffled_pools.push_back(ShuffledPool{nested_pools[i], &pool_states[i], i, /* error_count = */ 0, /* slowdown_count = */ 0}); + shuffled_pools.emplace_back(ShuffledPool{.pool = nested_pools[i], .state = &pool_states[i], .index = i}); ::sort( shuffled_pools.begin(), shuffled_pools.end(), - [](const ShuffledPool & lhs, const ShuffledPool & rhs) + [use_slowdown_count](const ShuffledPool & lhs, const ShuffledPool & rhs) { - return PoolState::compare(*lhs.state, *rhs.state); + return PoolState::compare(*lhs.state, *rhs.state, use_slowdown_count); }); return shuffled_pools; @@ -344,10 +344,14 @@ struct PoolWithFailoverBase::PoolState random = rng(); } - static bool compare(const PoolState & lhs, const PoolState & rhs) + static bool compare(const PoolState & lhs, const PoolState & rhs, bool use_slowdown_count) { - return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random) - < std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random); + if (use_slowdown_count) + return std::forward_as_tuple(lhs.error_count, lhs.slowdown_count, lhs.config_priority, lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.error_count, rhs.slowdown_count, rhs.config_priority, rhs.priority, rhs.random); + else + return std::forward_as_tuple(lhs.error_count, lhs.config_priority, lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.error_count, rhs.config_priority, rhs.priority, rhs.random); } private: diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index d7e73208798..27e4afb6430 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -108,15 +108,9 @@ def test_parallel_replicas_custom_key_failover( == "subqueries\t4\n" ) - # currently this assert is flaky with asan and tsan builds, disable the assert in such cases for now - # will be investigated separately - if ( - not node1.is_built_with_thread_sanitizer() - and not node1.is_built_with_address_sanitizer() - ): - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" - ) - == "n1\t3\nn3\t2\n" + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" ) + == "n1\t3\nn3\t2\n" + ) From 474efd98a50d20f955fc5b939916cf9077c3a730 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 19 Feb 2024 20:46:06 +0000 Subject: [PATCH 090/145] Docs: Add example how to use "replace" and "remove" attributes in configuration Bug: 8394 --- docs/en/operations/configuration-files.md | 56 +++++++++++++++++++++-- 1 file changed, 51 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 81b25a4e897..615cff6496a 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -10,11 +10,57 @@ The ClickHouse server can be configured with configuration files in XML or YAML It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `...` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent. -## Overriding Configuration {#override} +## Merging Configuration {#merging} -The merge of configuration files behaves as one intuitively expects: The contents of both files are combined recursively, children with the same name are replaced by the element of the more specific configuration file. The merge can be customized using attributes `replace` and `remove`. -- Attribute `replace` means that the element is replaced by the specified one. -- Attribute `remove` means that the element is deleted. +Two configuration files (usually the main configuration file and another configuration files from `config.d/`) are merged as follows: + +- If a node (i.e. a path leading to an element) appears in both files and does not have attributes `replace` or `remove`, it is included in the merged configuration file and children from both nodes are included and merged recursively. +- If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included. +- If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted). + +```xml + + + 1 + + + 2 + + + 3 + + +``` + +and + +```xml + + + 4 + + + 5 + + + 6 + + +``` + +generates merged configuration file: + +```xml + + + 1 + 4 + + + 5 + + +``` To specify that a value of an element should be replaced by the value of an environment variable, you can use attribute `from_env`. @@ -125,7 +171,7 @@ Users configuration can be split into separate files similar to `config.xml` and Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`. Directory `users.d` is used by default, as `users_config` defaults to `users.xml`. -Note that configuration files are first merged taking into account [Override](#override) settings and includes are processed after that. +Note that configuration files are first [merged](#merging) taking into account settings, and includes are processed after that. ## XML example {#example} From 6b6630c5a7cc0ed49eb1ef22b139684b3ba22c20 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 19 Feb 2024 23:02:24 +0000 Subject: [PATCH 091/145] Allow casting of bools in string representation to to true bools Fixes #57256 --- src/Interpreters/convertFieldToType.cpp | 4 +++- .../0_stateless/02933_compare_with_bool_as_string.reference | 1 + .../queries/0_stateless/02933_compare_with_bool_as_string.sql | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02933_compare_with_bool_as_string.reference create mode 100755 tests/queries/0_stateless/02933_compare_with_bool_as_string.sql diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index c3b8405659a..346180c3613 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -493,10 +493,12 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { /// Promote data type to avoid overflows. Note that overflows in the largest data type are still possible. /// But don't promote Float32, since we want to keep the exact same value + /// Also don't promote domain types (like bool) because we would otherwise use the serializer of the promoted type (e.g. UInt64 for + /// bool, which does not allow 'true' and 'false' as input values) const IDataType * type_to_parse = &type; DataTypePtr holder; - if (type.canBePromoted() && !which_type.isFloat32()) + if (type.canBePromoted() && !which_type.isFloat32() && !type.getCustomSerialization()) { holder = type.promoteNumericType(); type_to_parse = holder.get(); diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference b/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02933_compare_with_bool_as_string.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql new file mode 100755 index 00000000000..5dbacd5fbbf --- /dev/null +++ b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql @@ -0,0 +1 @@ +select true = 'true'; From b2285ce6e5c119356120016f6a45cd93636c9ca9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2024 06:52:07 +0100 Subject: [PATCH 092/145] Remove extensively aggressive check --- src/Interpreters/executeQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 808a2ee2d81..f2aa51bd6de 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -722,7 +722,7 @@ static std::tuple executeQueryImpl( /// TODO: parser should fail early when max_query_size limit is reached. ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); -#ifndef NDEBUG +#if 0 /// Verify that AST formatting is consistent: /// If you format AST, parse it back, and format it again, you get the same string. From 9e0f607608027a0182eca9eb257fcd4a0c4ae065 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 16 Feb 2024 18:43:39 +0300 Subject: [PATCH 093/145] Revert "Revert "ReplicatedMergeTree invalid metadata_version fix"" --- src/Storages/StorageReplicatedMergeTree.cpp | 14 ++++--- ...ge_tree_invalid_metadata_version.reference | 14 +++++++ ...ed_merge_tree_invalid_metadata_version.sql | 40 +++++++++++++++++++ 3 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference create mode 100644 tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fb4e9b4ad87..a95b3f99b6f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -513,8 +513,15 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (same_structure) { Coordination::Stat metadata_stat; - current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat); + current_zookeeper->get(fs::path(zookeeper_path) / "metadata", &metadata_stat); + + /** We change metadata_snapshot so that `createReplica` method will create `metadata_version` node in ZooKeeper + * with version of table '/metadata' node in Zookeeper. + * + * Otherwise `metadata_version` for not first replica will be initialized with 0 by default. + */ setInMemoryMetadata(metadata_snapshot->withMetadataVersion(metadata_stat.version)); + metadata_snapshot = getInMemoryMetadataPtr(); } } catch (Coordination::Exception & e) @@ -5817,6 +5824,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer Coordination::Requests requests; requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "columns", entry.columns_str, -1)); requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata", entry.metadata_str, -1)); + requests.emplace_back(zkutil::makeSetRequest(fs::path(replica_path) / "metadata_version", std::to_string(entry.alter_version), -1)); auto table_id = getStorageID(); auto alter_context = getContext(); @@ -5863,10 +5871,6 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer resetObjectColumnsFromActiveParts(parts_lock); } - /// This transaction may not happen, but it's OK, because on the next retry we will eventually create/update this node - /// TODO Maybe do in in one transaction for Replicated database? - zookeeper->createOrUpdate(fs::path(replica_path) / "metadata_version", std::to_string(current_metadata->getMetadataVersion()), zkutil::CreateMode::Persistent); - return true; } diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference new file mode 100644 index 00000000000..128e3adcc0a --- /dev/null +++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.reference @@ -0,0 +1,14 @@ +Row 1: +────── +name: metadata +version: 1 +-- +Row 1: +────── +name: metadata_version +value: 1 +-- +id UInt64 +value String +insert_time DateTime +insert_time_updated DateTime diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql new file mode 100644 index 00000000000..3e37f368fd8 --- /dev/null +++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql @@ -0,0 +1,40 @@ +-- Tags: zookeeper + +DROP TABLE IF EXISTS test_table_replicated; +CREATE TABLE test_table_replicated +( + id UInt64, + value String +) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '1_replica') ORDER BY id; + +ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime; + +SELECT name, version FROM system.zookeeper +WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/' +AND name = 'metadata' FORMAT Vertical; + +DROP TABLE IF EXISTS test_table_replicated_second; +CREATE TABLE test_table_replicated_second +( + id UInt64, + value String, + insert_time DateTime +) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_table_replicated', '2_replica') ORDER BY id; + +DROP TABLE test_table_replicated; + +SELECT '--'; + +SELECT name, value FROM system.zookeeper +WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica' +AND name = 'metadata_version' FORMAT Vertical; + +SYSTEM RESTART REPLICA test_table_replicated_second; + +ALTER TABLE test_table_replicated_second ADD COLUMN insert_time_updated DateTime; + +SELECT '--'; + +DESCRIBE test_table_replicated_second; + +DROP TABLE test_table_replicated_second; From f3c2dfeff30e62dd7785bd52aa2469803d6d0552 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 16 Feb 2024 19:39:57 +0300 Subject: [PATCH 094/145] Fixed tests --- .../02989_replicated_merge_tree_invalid_metadata_version.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql index 3e37f368fd8..15633586aa8 100644 --- a/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql +++ b/tests/queries/0_stateless/02989_replicated_merge_tree_invalid_metadata_version.sql @@ -10,7 +10,7 @@ CREATE TABLE test_table_replicated ALTER TABLE test_table_replicated ADD COLUMN insert_time DateTime; SELECT name, version FROM system.zookeeper -WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/' +WHERE path = (SELECT zookeeper_path FROM system.replicas WHERE database = currentDatabase() AND table = 'test_table_replicated') AND name = 'metadata' FORMAT Vertical; DROP TABLE IF EXISTS test_table_replicated_second; @@ -26,7 +26,7 @@ DROP TABLE test_table_replicated; SELECT '--'; SELECT name, value FROM system.zookeeper -WHERE path = '/clickhouse/tables/' || currentDatabase() ||'/test_table_replicated/replicas/2_replica' +WHERE path = (SELECT replica_path FROM system.replicas WHERE database = currentDatabase() AND table = 'test_table_replicated_second') AND name = 'metadata_version' FORMAT Vertical; SYSTEM RESTART REPLICA test_table_replicated_second; From ecaaa5066a27a4c94a25d09e1abc5244ee2f1e9e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 20 Feb 2024 10:19:36 +0100 Subject: [PATCH 095/145] Update NuRaft to master --- contrib/NuRaft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index a44f99fbfb9..5bb3a0e8257 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit a44f99fbfb9bead06630afb0a4bef2bad48d6e4c +Subproject commit 5bb3a0e8257bacd65b099cb1b7239bd6b9a2c477 From 5ef4a30d6fcc1219a0aa4b309aa98340611158fa Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 20 Feb 2024 10:24:09 +0100 Subject: [PATCH 096/145] Fix --- src/Storages/RabbitMQ/RabbitMQSource.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 25e4b120f42..72196e7dd3c 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -129,12 +129,6 @@ Chunk RabbitMQSource::generateImpl() return {}; } - if (consumer->needChannelUpdate()) - { - LOG_TRACE(log, "Channel {} is in error state, will update", consumer->getChannelID()); - consumer->updateChannel(storage.getConnection()); - } - /// Currently it is one time usage source: to make sure data is flushed /// strictly by timeout or by block size. is_finished = true; From df53826e862b35459a9097bf96cd3bee8700f851 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 09:32:36 +0000 Subject: [PATCH 097/145] Fix style --- tests/queries/0_stateless/02933_compare_with_bool_as_string.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/02933_compare_with_bool_as_string.sql diff --git a/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql b/tests/queries/0_stateless/02933_compare_with_bool_as_string.sql old mode 100755 new mode 100644 From cfda64e1a4e36d837934a724baf9485422056487 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 20 Feb 2024 10:33:08 +0100 Subject: [PATCH 098/145] Use Keeper only events --- src/Coordination/FourLetterCommand.cpp | 11 +++++++++-- src/Coordination/KeeperConstants.cpp | 7 ++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 09e99f69fd0..d7fa5abe742 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -38,6 +38,12 @@ String formatZxid(int64_t zxid) } +#if USE_NURAFT +namespace ProfileEvents +{ + extern const std::vector keeper_profile_events; +} +#endif namespace DB { @@ -657,6 +663,7 @@ String ProfileEventsCommand::run() { StringBuffer ret; +#if USE_NURAFT auto append = [&ret] (const String & metric, uint64_t value, const String & docs) -> void { writeText(metric, ret); @@ -667,14 +674,14 @@ String ProfileEventsCommand::run() writeText('\n', ret); }; - for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) + for (auto i : ProfileEvents::keeper_profile_events) { const auto counter = ProfileEvents::global_counters[i].load(std::memory_order_relaxed); - std::string metric_name{ProfileEvents::getName(static_cast(i))}; std::string metric_doc{ProfileEvents::getDocumentation(static_cast(i))}; append(metric_name, counter, metric_doc); } +#endif return ret.str(); } diff --git a/src/Coordination/KeeperConstants.cpp b/src/Coordination/KeeperConstants.cpp index f788095334e..aea2391cf13 100644 --- a/src/Coordination/KeeperConstants.cpp +++ b/src/Coordination/KeeperConstants.cpp @@ -284,7 +284,12 @@ M(InterfaceMySQLSendBytes) \ M(InterfaceMySQLReceiveBytes) \ M(InterfacePostgreSQLSendBytes) \ - M(InterfacePostgreSQLReceiveBytes) + M(InterfacePostgreSQLReceiveBytes) \ +\ + M(KeeperLogsEntryReadFromLatestCache) \ + M(KeeperLogsEntryReadFromCommitCache) \ + M(KeeperLogsEntryReadFromFile) \ + M(KeeperLogsPrefetchedEntries) \ namespace ProfileEvents { From c475f3c0bda0636d45c250f9866be0732a059fda Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 10:05:54 +0000 Subject: [PATCH 099/145] Fix test_remote_blobs_naming --- .../test_backward_compatibility.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py index 8c52b05dba2..8bdd82ce9bf 100644 --- a/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py +++ b/tests/integration/test_remote_blobs_naming/test_backward_compatibility.py @@ -288,7 +288,7 @@ def test_replicated_merge_tree(cluster, test_case): WHERE local_path LIKE '%{uuid}%' AND local_path NOT LIKE '%format_version.txt%' - ORDER BY ALL + ORDER BY * """ ).strip() @@ -329,7 +329,7 @@ def test_replicated_merge_tree(cluster, test_case): WHERE table = 'test_replicated_merge_tree' AND active - ORDER BY ALL + ORDER BY * """ ) .strip() @@ -349,7 +349,7 @@ def test_replicated_merge_tree(cluster, test_case): SELECT name FROM system.zookeeper WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}' - ORDER BY ALL + ORDER BY * """ ) .strip() @@ -363,7 +363,7 @@ def test_replicated_merge_tree(cluster, test_case): SELECT name FROM system.zookeeper WHERE path='/clickhouse/zero_copy/zero_copy_s3/{table_shared_uuid}/{part}/{blob}' - ORDER BY ALL + ORDER BY * """ ) .strip() From 3dac4c97438317db2357d0e1db1756a2638da55b Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 20 Feb 2024 11:10:11 +0100 Subject: [PATCH 100/145] Fix --- src/Interpreters/S3QueueLog.cpp | 8 ++++++-- src/Interpreters/S3QueueLog.h | 6 +++++- src/Storages/S3Queue/S3QueueSource.cpp | 9 ++++++++- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/S3QueueLog.cpp b/src/Interpreters/S3QueueLog.cpp index 967becb6e0f..3ed58de0f87 100644 --- a/src/Interpreters/S3QueueLog.cpp +++ b/src/Interpreters/S3QueueLog.cpp @@ -28,7 +28,9 @@ ColumnsDescription S3QueueLogElement::getColumnsDescription() {"hostname", std::make_shared(std::make_shared())}, {"event_date", std::make_shared()}, {"event_time", std::make_shared()}, - {"table_uuid", std::make_shared()}, + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"uuid", std::make_shared()}, {"file_name", std::make_shared()}, {"rows_processed", std::make_shared()}, {"status", status_datatype}, @@ -45,7 +47,9 @@ void S3QueueLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(getFQDNOrHostName()); columns[i++]->insert(DateLUT::instance().toDayNum(event_time).toUnderType()); columns[i++]->insert(event_time); - columns[i++]->insert(table_uuid); + columns[i++]->insert(database); + columns[i++]->insert(table); + columns[i++]->insert(uuid); columns[i++]->insert(file_name); columns[i++]->insert(rows_processed); columns[i++]->insert(status); diff --git a/src/Interpreters/S3QueueLog.h b/src/Interpreters/S3QueueLog.h index e0362bf9716..b6bc138d42c 100644 --- a/src/Interpreters/S3QueueLog.h +++ b/src/Interpreters/S3QueueLog.h @@ -12,7 +12,11 @@ namespace DB struct S3QueueLogElement { time_t event_time{}; - std::string table_uuid; + + std::string database; + std::string table; + std::string uuid; + std::string file_name; size_t rows_processed = 0; diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index b4f5f957f76..933238d8614 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -352,7 +352,11 @@ void StorageS3QueueSource::applyActionAfterProcessing(const String & path) } } -void StorageS3QueueSource::appendLogElement(const std::string & filename, S3QueueFilesMetadata::FileStatus & file_status_, size_t processed_rows, bool processed) +void StorageS3QueueSource::appendLogElement( + const std::string & filename, + S3QueueFilesMetadata::FileStatus & file_status_, + size_t processed_rows, + bool processed) { if (!s3_queue_log) return; @@ -363,6 +367,9 @@ void StorageS3QueueSource::appendLogElement(const std::string & filename, S3Queu elem = S3QueueLogElement { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), + .database = storage_id.database_name, + .table = storage_id.table_name, + .uuid = toString(storage_id.uuid), .file_name = filename, .rows_processed = processed_rows, .status = processed ? S3QueueLogElement::S3QueueStatus::Processed : S3QueueLogElement::S3QueueStatus::Failed, From 4412f71a1252198d1f18117ebf54f2204a71fc36 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 10:16:46 +0000 Subject: [PATCH 101/145] Docs: Follow-up to #60157 --- docs/en/operations/configuration-files.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 615cff6496a..9f17f4af1e8 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -18,7 +18,11 @@ Two configuration files (usually the main configuration file and another configu - If one of both nodes contains attribute `replace`, it is included in the merged configuration file but only children from the node with attribute `replace` are included. - If one of both nodes contains attribute `remove`, the node is not included in the merged configuration file (if it exists already, it is deleted). +Example: + + ```xml + 1 @@ -35,6 +39,7 @@ Two configuration files (usually the main configuration file and another configu and ```xml + 4 @@ -56,7 +61,7 @@ generates merged configuration file: 1 4 - + 5 From 354193795058d80f5d11deb82c0fb837ed46113f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2024 12:26:51 +0100 Subject: [PATCH 102/145] Add a test for #44318 --- .../02997_projections_formatting.reference | 14 ++++++++++++++ .../0_stateless/02997_projections_formatting.sql | 2 ++ 2 files changed, 16 insertions(+) create mode 100644 tests/queries/0_stateless/02997_projections_formatting.reference create mode 100644 tests/queries/0_stateless/02997_projections_formatting.sql diff --git a/tests/queries/0_stateless/02997_projections_formatting.reference b/tests/queries/0_stateless/02997_projections_formatting.reference new file mode 100644 index 00000000000..7b9d6185986 --- /dev/null +++ b/tests/queries/0_stateless/02997_projections_formatting.reference @@ -0,0 +1,14 @@ +CREATE TEMPORARY TABLE t_proj +( + `t` DateTime, + `id` UInt64, + PROJECTION p + ( + SELECT + id, + t + ORDER BY toStartOfDay(t) + ) +) +ENGINE = MergeTree +ORDER BY id diff --git a/tests/queries/0_stateless/02997_projections_formatting.sql b/tests/queries/0_stateless/02997_projections_formatting.sql new file mode 100644 index 00000000000..ef051bca2e9 --- /dev/null +++ b/tests/queries/0_stateless/02997_projections_formatting.sql @@ -0,0 +1,2 @@ +CREATE TEMPORARY TABLE t_proj (t DateTime, id UInt64, PROJECTION p (SELECT id, t ORDER BY toStartOfDay(t))) ENGINE = MergeTree ORDER BY id; +SHOW CREATE TEMPORARY TABLE t_proj FORMAT TSVRaw; From 9036a6974792b6d7a73b190c76e90e0e72ae0296 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2024 12:28:01 +0100 Subject: [PATCH 103/145] Add a test for #44318 --- .../02997_projections_formatting.reference | 12 ++++++++++++ .../0_stateless/02997_projections_formatting.sql | 3 +++ 2 files changed, 15 insertions(+) diff --git a/tests/queries/0_stateless/02997_projections_formatting.reference b/tests/queries/0_stateless/02997_projections_formatting.reference index 7b9d6185986..6a60da1089a 100644 --- a/tests/queries/0_stateless/02997_projections_formatting.reference +++ b/tests/queries/0_stateless/02997_projections_formatting.reference @@ -12,3 +12,15 @@ CREATE TEMPORARY TABLE t_proj ) ENGINE = MergeTree ORDER BY id +CREATE TEMPORARY TABLE t_proj2 +( + `a` UInt32, + `b` UInt32, + PROJECTION p + ( + SELECT a + ORDER BY b * 2 + ) +) +ENGINE = MergeTree +ORDER BY a diff --git a/tests/queries/0_stateless/02997_projections_formatting.sql b/tests/queries/0_stateless/02997_projections_formatting.sql index ef051bca2e9..b593c2576b1 100644 --- a/tests/queries/0_stateless/02997_projections_formatting.sql +++ b/tests/queries/0_stateless/02997_projections_formatting.sql @@ -1,2 +1,5 @@ CREATE TEMPORARY TABLE t_proj (t DateTime, id UInt64, PROJECTION p (SELECT id, t ORDER BY toStartOfDay(t))) ENGINE = MergeTree ORDER BY id; SHOW CREATE TEMPORARY TABLE t_proj FORMAT TSVRaw; + +CREATE TEMPORARY TABLE t_proj2 (a UInt32, b UInt32, PROJECTION p (SELECT a ORDER BY b * 2)) ENGINE = MergeTree ORDER BY a; +SHOW CREATE TEMPORARY TABLE t_proj2 FORMAT TSVRaw; From ead2e7bc41e71eb2041d8616f9fa5e3ecc2a2f61 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2024 12:32:25 +0100 Subject: [PATCH 104/145] Revert "Restriction for the access key id for s3." --- src/IO/S3/Client.cpp | 16 ---------------- .../02966_s3_access_key_id_restriction.reference | 0 .../02966_s3_access_key_id_restriction.sql | 6 ------ 3 files changed, 22 deletions(-) delete mode 100644 tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference delete mode 100644 tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 7f0ede72740..182e7ad18cd 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -27,7 +27,6 @@ #include -#include namespace ProfileEvents { @@ -48,7 +47,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int TOO_MANY_REDIRECTS; - extern const int BAD_ARGUMENTS; } namespace S3 @@ -106,19 +104,6 @@ void verifyClientConfiguration(const Aws::Client::ClientConfiguration & client_c assert_cast(*client_config.retryStrategy); } -void validateCredentials(const Aws::Auth::AWSCredentials& auth_credentials) -{ - if (auth_credentials.GetAWSAccessKeyId().empty()) - { - return; - } - /// Follow https://docs.aws.amazon.com/IAM/latest/APIReference/API_AccessKey.html - if (!std::all_of(auth_credentials.GetAWSAccessKeyId().begin(), auth_credentials.GetAWSAccessKeyId().end(), isWordCharASCII)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Access key id has an invalid character"); - } -} - void addAdditionalAMZHeadersToCanonicalHeadersList( Aws::AmazonWebServiceRequest & request, const HTTPHeaderEntries & extra_headers @@ -144,7 +129,6 @@ std::unique_ptr Client::create( const ClientSettings & client_settings) { verifyClientConfiguration(client_configuration); - validateCredentials(credentials_provider->GetAWSCredentials()); return std::unique_ptr( new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, client_settings)); } diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql b/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql deleted file mode 100644 index c1ca0b4bcd5..00000000000 --- a/tests/queries/0_stateless/02966_s3_access_key_id_restriction.sql +++ /dev/null @@ -1,6 +0,0 @@ --- Tags: no-fasttest - -select * from s3('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } -select * from deltaLake('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } -select * from hudi('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } -select * from iceberg('http://localhost:11111/test/a.tsv', '\ninjection\n', 'admin'); -- { serverError 36 } From 6249e8da3f89596eb36a4dd9dc8bd259ed05748b Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 19 Feb 2024 13:43:19 +0000 Subject: [PATCH 105/145] Analyzer: fix row level filters with PREWHERE + additional filters --- src/Planner/PlannerJoinTree.cpp | 10 +++++++--- tests/analyzer_tech_debt.txt | 1 - .../02131_row_policies_combination.reference | 9 +++++++++ .../02131_row_policies_combination.sql | 18 ++++++++++++++++++ ...02763_row_policy_storage_merge_alias.sql.j2 | 2 -- 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 227ac86d3a5..e6a459d0e8a 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -801,14 +801,18 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres table_expression_query_info.prewhere_info->prewhere_actions = filter_info.actions; table_expression_query_info.prewhere_info->prewhere_column_name = filter_info.column_name; table_expression_query_info.prewhere_info->remove_prewhere_column = filter_info.do_remove_column; + table_expression_query_info.prewhere_info->need_filter = true; } - else + else if (!table_expression_query_info.prewhere_info->row_level_filter) { table_expression_query_info.prewhere_info->row_level_filter = filter_info.actions; table_expression_query_info.prewhere_info->row_level_column_name = filter_info.column_name; + table_expression_query_info.prewhere_info->need_filter = true; + } + else + { + where_filters.emplace_back(filter_info, std::move(description)); } - - table_expression_query_info.prewhere_info->need_filter = true; } else { diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 0566dca8f5c..094865573f9 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -16,7 +16,6 @@ 02354_annoy 02493_inconsistent_hex_and_binary_number 02725_agg_projection_resprect_PK -02763_row_policy_storage_merge_alias # Check after constants refactoring 02901_parallel_replicas_rollup # Flaky. Please don't delete them without fixing them: diff --git a/tests/queries/0_stateless/02131_row_policies_combination.reference b/tests/queries/0_stateless/02131_row_policies_combination.reference index b76028d5077..5015cb14456 100644 --- a/tests/queries/0_stateless/02131_row_policies_combination.reference +++ b/tests/queries/0_stateless/02131_row_policies_combination.reference @@ -12,6 +12,15 @@ R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3) 1 2 3 +R1, R2, R3 + additional_table_filters and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1) +2 +3 +R1, R2, R3 + additional_result_filter and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1) +2 +3 +R1, R2, R3 + additional_table_filters and WHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1) +2 +3 R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2) 1 2 diff --git a/tests/queries/0_stateless/02131_row_policies_combination.sql b/tests/queries/0_stateless/02131_row_policies_combination.sql index b5be672bb1b..02f2365eed8 100644 --- a/tests/queries/0_stateless/02131_row_policies_combination.sql +++ b/tests/queries/0_stateless/02131_row_policies_combination.sql @@ -23,6 +23,24 @@ CREATE ROW POLICY 02131_filter_3 ON 02131_rptable USING x=3 AS permissive TO ALL SELECT 'R1, R2, R3: (x == 1) OR (x == 2) OR (x == 3)'; SELECT * FROM 02131_rptable; +SELECT 'R1, R2, R3 + additional_table_filters and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)'; +SELECT * FROM 02131_rptable +PREWHERE x >= 2 +SETTINGS additional_table_filters = {'02131_rptable': 'x > 1'} +; + +SELECT 'R1, R2, R3 + additional_result_filter and PREWHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)'; +SELECT * FROM 02131_rptable +PREWHERE x >= 2 +SETTINGS additional_result_filter = 'x > 1' +; + +SELECT 'R1, R2, R3 + additional_table_filters and WHERE: (x == 1) OR (x == 2) OR (x == 3) AND (x < 3) AND (x > 1)'; +SELECT * FROM 02131_rptable +WHERE x >= 2 +SETTINGS additional_table_filters = {'02131_rptable': 'x > 1'} +; + CREATE ROW POLICY 02131_filter_4 ON 02131_rptable USING x<=2 AS restrictive TO ALL; SELECT 'R1, R2, R3, R4: ((x == 1) OR (x == 2) OR (x == 3)) AND (x <= 2)'; SELECT * FROM 02131_rptable; diff --git a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 index bdd456951dd..99ac89c4eb4 100644 --- a/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 +++ b/tests/queries/0_stateless/02763_row_policy_storage_merge_alias.sql.j2 @@ -12,8 +12,6 @@ CREATE TABLE 02763_a_merge (x UInt8, y UInt64, z UInt64) ENGINE = Merge(currentD {% for prew in [0 , 1] -%} - - SELECT 'x, y, z FROM 02763_a_merge'; SELECT x, y, z FROM 02763_a_merge ORDER BY x SETTINGS optimize_move_to_prewhere= {{prew}}; SELECT '* FROM 02763_a_merge'; From 0fff496ad388df450af538df2cc3a57c29c86f45 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 11:57:08 +0000 Subject: [PATCH 106/145] Refactoring and cleanup --- src/Functions/array/arrayDotProduct.cpp | 7 +- src/Functions/array/arrayScalarProduct.h | 148 +++++++++++------------ 2 files changed, 76 insertions(+), 79 deletions(-) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 47e865785d4..180f85499cd 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -49,14 +49,13 @@ public: if (!valid) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Arguments of function {} " - "only support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", - std::string(NameArrayDotProduct::name)); + "Arguments of function {} only support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + NameArrayDotProduct::name); return result_type; } template - static inline NO_SANITIZE_UNDEFINED ResultType apply( + static NO_SANITIZE_UNDEFINED ResultType apply( const T * left, const U * right, size_t size) diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h index 374a2d8a194..2bef11eade5 100644 --- a/src/Functions/array/arrayScalarProduct.h +++ b/src/Functions/array/arrayScalarProduct.h @@ -16,9 +16,9 @@ class Context; namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; } @@ -28,9 +28,81 @@ class FunctionArrayScalarProduct : public IFunction { public: static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + // Basic type check + std::vector nested_types(2, nullptr); + for (size_t i = 0; i < 2; ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments for function {} must be of type Array", getName()); + + const auto & nested_type = array_type->getNestedType(); + if (!isNativeNumber(nested_type) && !isEnum(nested_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} cannot process values of type {}", getName(), nested_type->getName()); + + nested_types[i] = nested_type; + } + + // Perform further type checks in Method + return Method::getReturnType(nested_types[0], nested_types[1]); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + { + switch (result_type->getTypeId()) + { + #define SUPPORTED_TYPE(type) \ + case TypeIndex::type: \ + return executeWithResultType(arguments); \ + break; + + SUPPORTED_TYPE(UInt8) + SUPPORTED_TYPE(UInt16) + SUPPORTED_TYPE(UInt32) + SUPPORTED_TYPE(UInt64) + SUPPORTED_TYPE(Int8) + SUPPORTED_TYPE(Int16) + SUPPORTED_TYPE(Int32) + SUPPORTED_TYPE(Int64) + SUPPORTED_TYPE(Float32) + SUPPORTED_TYPE(Float64) + #undef SUPPORTED_TYPE + + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); + } + } private: + template + ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const + { + ColumnPtr res; + if (!((res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)) + || (res = executeNumber(arguments)))) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + + return res; + } template ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments) const @@ -51,7 +123,6 @@ private: return nullptr; } - template ColumnPtr executeNumberNumber(const ColumnsWithTypeAndName & arguments) const { @@ -103,79 +174,6 @@ private: } } -public: - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - // Basic type check - std::vector nested_types(2, nullptr); - for (size_t i = 0; i < getNumberOfArguments(); ++i) - { - const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName()); - - const auto & nested_type = array_type->getNestedType(); - if (!isNativeNumber(nested_type) && !isEnum(nested_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", - getName(), nested_type->getName()); - nested_types[i] = nested_type; - } - - // Detail type check in Method, then return ReturnType - return Method::getReturnType(nested_types[0], nested_types[1]); - } - - template - ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr res; - if (!((res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); - - return res; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override - { - switch (result_type->getTypeId()) - { - #define SUPPORTED_TYPE(type) \ - case TypeIndex::type: \ - return executeWithResultType(arguments); \ - break; - - SUPPORTED_TYPE(UInt8) - SUPPORTED_TYPE(UInt16) - SUPPORTED_TYPE(UInt32) - SUPPORTED_TYPE(UInt64) - SUPPORTED_TYPE(Int8) - SUPPORTED_TYPE(Int16) - SUPPORTED_TYPE(Int32) - SUPPORTED_TYPE(Int64) - SUPPORTED_TYPE(Float32) - SUPPORTED_TYPE(Float64) - #undef SUPPORTED_TYPE - - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); - } - } }; } From a8e4627663599a3fa722fdb470e058c18c79c933 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 20 Feb 2024 12:09:54 +0000 Subject: [PATCH 107/145] Fix: can't guarantee query distribution with enabled hedged requests --- src/Client/ConnectionPoolWithFailover.h | 4 ++-- .../test.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index fb60782806f..45916a395ef 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -83,8 +83,8 @@ public: struct NestedPoolStatus { const Base::NestedPoolPtr pool; - size_t error_count; - size_t slowdown_count; + size_t error_count = 0; + size_t slowdown_count = 0; std::chrono::seconds estimated_recovery_time; }; diff --git a/tests/integration/test_parallel_replicas_custom_key_failover/test.py b/tests/integration/test_parallel_replicas_custom_key_failover/test.py index 27e4afb6430..bbb8fd5abb0 100644 --- a/tests/integration/test_parallel_replicas_custom_key_failover/test.py +++ b/tests/integration/test_parallel_replicas_custom_key_failover/test.py @@ -108,9 +108,12 @@ def test_parallel_replicas_custom_key_failover( == "subqueries\t4\n" ) - assert ( - node1.query( - f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + # With enabled hedged requests, we can't guarantee exact query distribution among nodes + # In case of a replica being slow in terms of responsiveness, hedged connection can change initial replicas choice + if use_hedged_requests == 0: + assert ( + node1.query( + f"SELECT h, count() FROM clusterAllReplicas({cluster_name}, system.query_log) WHERE initial_query_id = '{query_id}' AND type ='QueryFinish' GROUP BY hostname() as h ORDER BY h SETTINGS skip_unavailable_shards=1" + ) + == "n1\t3\nn3\t2\n" ) - == "n1\t3\nn3\t2\n" - ) From 37b85360e19cad7e238271259c24214e7362ea22 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 20 Feb 2024 13:15:34 +0100 Subject: [PATCH 108/145] Update CNF.cpp --- src/Analyzer/Passes/CNF.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/CNF.cpp b/src/Analyzer/Passes/CNF.cpp index 5cb79011856..71549f9e71d 100644 --- a/src/Analyzer/Passes/CNF.cpp +++ b/src/Analyzer/Passes/CNF.cpp @@ -163,7 +163,7 @@ private: class PushOrVisitor { public: - PushOrVisitor(size_t max_atoms_) + explicit PushOrVisitor(size_t max_atoms_) : max_atoms(max_atoms_) , and_resolver(createInternalFunctionAndOverloadResolver()) , or_resolver(createInternalFunctionOrOverloadResolver()) From 5267e6b0ce31a2e82aedc30dbe854d227c526b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Feb 2024 13:38:33 +0100 Subject: [PATCH 109/145] Fix arrayReduce with nullable aggregate function name --- src/Functions/array/arrayReduce.cpp | 3 +++ .../02996_nullable_arrayReduce.reference | 0 .../0_stateless/02996_nullable_arrayReduce.sql | 14 ++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/02996_nullable_arrayReduce.reference create mode 100644 tests/queries/0_stateless/02996_nullable_arrayReduce.sql diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 5a6a99ef785..4e192cd7d99 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -48,6 +48,9 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + /// As we parse the function name and deal with arrays we don't want to default NULL handler, which will hide + /// nullability from us (which also means hidden from the aggregate functions) + bool useDefaultImplementationForNulls() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql new file mode 100644 index 00000000000..1019569284f --- /dev/null +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql @@ -0,0 +1,14 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/59600 +SELECT arrayReduce(toNullable('stddevSampOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce(toNullable('median'), [toDecimal32OrNull(toFixedString('1', 1), 2), 8]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toFixedString('--- Int Empty ---', toLowCardinality(17)), arrayReduce(toNullable('avgOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- { echoOn } +SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a); +SELECT arrayReduce('sumOrNull', []::Array(UInt8)) as a, toTypeName(a); +SELECT arrayReduce('sum', [NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); +SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); + +SELECT arrayReduce('any', toNullable(3)); From 0e3861d65de74d87f1a0dac9edeeef7d3afc6cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Feb 2024 14:20:14 +0100 Subject: [PATCH 110/145] Fix tests --- tests/queries/0_stateless/02732_transform_fuzz.sql | 2 +- .../02996_nullable_arrayReduce.reference | 14 ++++++++++++++ .../0_stateless/02996_nullable_arrayReduce.sql | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02732_transform_fuzz.sql b/tests/queries/0_stateless/02732_transform_fuzz.sql index c2918d4da81..872cf3a6599 100644 --- a/tests/queries/0_stateless/02732_transform_fuzz.sql +++ b/tests/queries/0_stateless/02732_transform_fuzz.sql @@ -1 +1 @@ -SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError BAD_ARGUMENTS } +SELECT caseWithExpr(arrayReduce(NULL, []), []); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference index e69de29bb2d..c6f369c1237 100644 --- a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference @@ -0,0 +1,14 @@ +-- { echoOn } +SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a); +0 UInt64 +SELECT arrayReduce('sumOrNull', []::Array(UInt8)) as a, toTypeName(a); +\N Nullable(UInt64) +SELECT arrayReduce('sum', [NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); +\N Nullable(UInt64) +SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +10 Nullable(UInt64) +SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); +\N Nullable(UInt8) +SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); +10 Nullable(UInt8) +SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql index 1019569284f..26697d2f10c 100644 --- a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql @@ -11,4 +11,4 @@ SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); -SELECT arrayReduce('any', toNullable(3)); +SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From 9bf3d6ae2927f5d1b4ffdccde2da93a73518c133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Feb 2024 14:23:44 +0100 Subject: [PATCH 111/145] Add test for 59437 --- .../02996_analyzer_prewhere_projection.reference | 1 + .../0_stateless/02996_analyzer_prewhere_projection.sql | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference create mode 100644 tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql diff --git a/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference new file mode 100644 index 00000000000..72749c905a3 --- /dev/null +++ b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql new file mode 100644 index 00000000000..9d676001010 --- /dev/null +++ b/tests/queries/0_stateless/02996_analyzer_prewhere_projection.sql @@ -0,0 +1,7 @@ +SET allow_suspicious_low_cardinality_types=1; + +CREATE TABLE t__fuzz_0 (`i` LowCardinality(Int32), `j` Int32, `k` Int32, PROJECTION p (SELECT * ORDER BY j)) ENGINE = MergeTree ORDER BY i SETTINGS index_granularity = 1; +INSERT INTO t__fuzz_0 Select number, number, number FROM numbers(100); + +SELECT * FROM t__fuzz_0 PREWHERE 7 AND (i < 2147483647) AND (j IN (2147483646, -2, 1)) +SETTINGS allow_experimental_analyzer = true; From 11f3b060997e4a0e4ee7f6c797f6c7689556c179 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 20 Feb 2024 14:35:19 +0100 Subject: [PATCH 112/145] Fix --- src/Coordination/Changelog.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 16f6c36b8a3..70224029da3 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -68,9 +68,13 @@ void moveFileBetweenDisks(DiskPtr disk_from, ChangelogFileDescriptionPtr descrip /// a different thread could be trying to read from the file /// we should make sure the source disk contains the file while read is in progress - description->withLock([&]{ description->disk = disk_to; }); + description->withLock( + [&] + { + description->disk = disk_to; + description->path = path_to; + }); disk_from->removeFile(description->path); - description->path = path_to; } constexpr auto DEFAULT_PREFIX = "changelog"; From bb0b1fa9bc412d058315694730796b907cc7bca1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 20 Feb 2024 14:21:29 +0100 Subject: [PATCH 113/145] Fix --- programs/keeper/CMakeLists.txt | 2 +- src/Coordination/Changelog.cpp | 99 ++------------- src/Coordination/KeeperCommon.cpp | 118 ++++++++++++++++++ src/Coordination/KeeperCommon.h | 28 +++++ src/Coordination/KeeperSnapshotManager.cpp | 80 ++---------- src/Coordination/KeeperStorage.cpp | 3 +- src/Coordination/ZooKeeperDataReader.cpp | 2 +- src/Coordination/pathUtils.cpp | 37 ------ src/Coordination/pathUtils.h | 13 -- src/Coordination/tests/gtest_coordination.cpp | 4 +- 10 files changed, 171 insertions(+), 215 deletions(-) create mode 100644 src/Coordination/KeeperCommon.cpp create mode 100644 src/Coordination/KeeperCommon.h delete mode 100644 src/Coordination/pathUtils.cpp delete mode 100644 src/Coordination/pathUtils.h diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index b8a5d9c9c19..70e0f229fd4 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -41,7 +41,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConstants.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperAsynchronousMetrics.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/pathUtils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperCommon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp diff --git a/src/Coordination/Changelog.cpp b/src/Coordination/Changelog.cpp index 40ece0e7d2e..cf0ea2193c8 100644 --- a/src/Coordination/Changelog.cpp +++ b/src/Coordination/Changelog.cpp @@ -1,7 +1,7 @@ -#include #include #include #include +#include #include #include #include @@ -35,88 +35,15 @@ namespace ErrorCodes namespace { -constexpr std::string_view tmp_prefix = "tmp_"; - -void moveFileBetweenDisks( +void moveChangelogBetweenDisks( DiskPtr disk_from, ChangelogFileDescriptionPtr description, DiskPtr disk_to, const std::string & path_to, const KeeperContextPtr & keeper_context) { - auto logger = getLogger("Changelog"); - LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", description->path, path_to, disk_from->getName(), disk_to->getName()); - /// we use empty file with prefix tmp_ to detect incomplete copies - /// if a copy is complete we don't care from which disk we use the same file - /// so it's okay if a failure happens after removing of tmp file but before we remove - /// the changelog from the source disk - auto from_path = fs::path(description->path); - auto tmp_changelog_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); - - const auto & coordination_settings = keeper_context->getCoordinationSettings(); - auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value; - auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms); - auto run_with_retries = [&](const auto & op, std::string_view operation_description) - { - /// we limit the amount of retries during initialization phase because shutdown won't be set - /// before initialization is done, i.e. we would be stuck in infinite loop - size_t retry_num = 0; - do - { - try - { - op(); - return true; - } - catch (...) - { - tryLogCurrentException( - logger, - fmt::format( - "While moving changelog {} to disk {} and running '{}'", - description->path, - disk_to->getName(), - operation_description)); - std::this_thread::sleep_for(retries_sleep); - } - - ++retry_num; - if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init) - { - LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description); - break; - } - - } while (!keeper_context->isShutdownCalled()); - - LOG_ERROR( - getLogger("Changelog"), - "Failed to run '{}' while moving changelog {} to disk {}", - operation_description, - description->path, - disk_to->getName()); - return false; - }; - - std::array, std::string_view>, 4> operations{ - std::pair{ - [&] - { - auto buf = disk_to->writeFile(tmp_changelog_name); - buf->finalize(); - }, - "creating temporary file"}, - std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"}, - std::pair{[&] { disk_to->removeFileIfExists(tmp_changelog_name); }, "removing temporary file"}, - std::pair{[&] { disk_from->removeFileIfExists(description->path); }, "removing changelog file from source disk"}, - }; - - for (const auto & [op, operation_description] : operations) - { - if (!run_with_retries(op, operation_description)) - return; - } - + moveFileBetweenDisks( + disk_from, description->path, disk_to, path_to, getLogger("Changelog"), keeper_context); description->path = path_to; description->disk = disk_to; } @@ -240,7 +167,7 @@ public: } else { - moveFileBetweenDisks(log_disk, current_file_description, disk, new_path, keeper_context); + moveChangelogBetweenDisks(log_disk, current_file_description, disk, new_path, keeper_context); } } } @@ -715,9 +642,9 @@ Changelog::Changelog( if (file_name == changelogs_detached_dir) continue; - if (file_name.starts_with(tmp_prefix)) + if (file_name.starts_with(tmp_keeper_file_prefix)) { - incomplete_files.emplace(file_name.substr(tmp_prefix.size()), it->path()); + incomplete_files.emplace(file_name.substr(tmp_keeper_file_prefix.size()), it->path()); continue; } @@ -880,7 +807,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin auto disk = getDisk(); if (latest_log_disk != disk && latest_log_disk == description->disk) - moveFileBetweenDisks(latest_log_disk, description, disk, description->path, keeper_context); + moveChangelogBetweenDisks(latest_log_disk, description, disk, description->path, keeper_context); }; /// we can have empty log (with zero entries) and last_log_read_result will be initialized @@ -966,7 +893,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin } if (description->disk != disk) - moveFileBetweenDisks(description->disk, description, disk, description->path, keeper_context); + moveChangelogBetweenDisks(description->disk, description, disk, description->path, keeper_context); } @@ -988,7 +915,7 @@ void Changelog::initWriter(ChangelogFileDescriptionPtr description) auto log_disk = description->disk; auto latest_log_disk = getLatestLogDisk(); if (log_disk != latest_log_disk) - moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context); + moveChangelogBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context); current_writer->setFile(std::move(description), WriteMode::Append); } @@ -1051,11 +978,11 @@ void Changelog::removeExistingLogs(ChangelogIter begin, ChangelogIter end) catch (const DB::Exception & e) { if (e.code() == DB::ErrorCodes::NOT_IMPLEMENTED) - moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context); + moveChangelogBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context); } } else - moveFileBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context); + moveChangelogBetweenDisks(changelog_disk, changelog_description, disk, new_path, keeper_context); itr = existing_changelogs.erase(itr); } @@ -1266,7 +1193,7 @@ void Changelog::writeAt(uint64_t index, const LogEntryPtr & log_entry) auto log_disk = description->disk; auto latest_log_disk = getLatestLogDisk(); if (log_disk != latest_log_disk) - moveFileBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context); + moveChangelogBetweenDisks(log_disk, description, latest_log_disk, description->path, keeper_context); current_writer->setFile(std::move(description), WriteMode::Append); diff --git a/src/Coordination/KeeperCommon.cpp b/src/Coordination/KeeperCommon.cpp new file mode 100644 index 00000000000..820abf1bbbe --- /dev/null +++ b/src/Coordination/KeeperCommon.cpp @@ -0,0 +1,118 @@ +#include + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +static size_t findLastSlash(StringRef path) +{ + if (path.size == 0) + return std::string::npos; + + for (size_t i = path.size - 1; i > 0; --i) + { + if (path.data[i] == '/') + return i; + } + + if (path.data[0] == '/') + return 0; + + return std::string::npos; +} + +StringRef parentNodePath(StringRef path) +{ + auto rslash_pos = findLastSlash(path); + if (rslash_pos > 0) + return StringRef{path.data, rslash_pos}; + return "/"; +} + +StringRef getBaseNodeName(StringRef path) +{ + size_t basename_start = findLastSlash(path); + return StringRef{path.data + basename_start + 1, path.size - basename_start - 1}; +} + +void moveFileBetweenDisks( + DiskPtr disk_from, + const std::string & path_from, + DiskPtr disk_to, + const std::string & path_to, + LoggerPtr logger, + const KeeperContextPtr & keeper_context) +{ + LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", path_from, path_to, disk_from->getName(), disk_to->getName()); + /// we use empty file with prefix tmp_ to detect incomplete copies + /// if a copy is complete we don't care from which disk we use the same file + /// so it's okay if a failure happens after removing of tmp file but before we remove + /// the file from the source disk + auto from_path = fs::path(path_from); + auto tmp_file_name = from_path.parent_path() / (std::string{tmp_keeper_file_prefix} + from_path.filename().string()); + + const auto & coordination_settings = keeper_context->getCoordinationSettings(); + auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value; + auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms); + auto run_with_retries = [&](const auto & op, std::string_view operation_description) + { + size_t retry_num = 0; + do + { + try + { + op(); + return true; + } + catch (...) + { + tryLogCurrentException( + logger, + fmt::format( + "While moving file {} to disk {} and running '{}'", path_from, disk_to->getName(), operation_description)); + std::this_thread::sleep_for(retries_sleep); + } + + ++retry_num; + if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init) + { + LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description); + break; + } + } while (!keeper_context->isShutdownCalled()); + + LOG_ERROR( + logger, + "Failed to run '{}' while moving file {} to disk {}", + operation_description, + path_from, + disk_to->getName()); + return false; + }; + + if (!run_with_retries( + [&] + { + auto buf = disk_to->writeFile(tmp_file_name); + buf->finalize(); + }, + "creating temporary file")) + return; + + if (!run_with_retries([&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file")) + return; + + if (!run_with_retries([&] { disk_to->removeFileIfExists(tmp_file_name); }, "removing temporary file")) + return; + + if (!run_with_retries([&] { disk_from->removeFileIfExists(path_from); }, "removing file from source disk")) + return; +} +} diff --git a/src/Coordination/KeeperCommon.h b/src/Coordination/KeeperCommon.h new file mode 100644 index 00000000000..179d80b295f --- /dev/null +++ b/src/Coordination/KeeperCommon.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include "Common/Logger.h" + +namespace DB +{ + +class IDisk; +using DiskPtr = std::shared_ptr; +class KeeperContext; +using KeeperContextPtr = std::shared_ptr; + +StringRef parentNodePath(StringRef path); + +StringRef getBaseNodeName(StringRef path); + +inline static constexpr std::string_view tmp_keeper_file_prefix = "tmp_"; + +void moveFileBetweenDisks( + DiskPtr disk_from, + const std::string & path_from, + DiskPtr disk_to, + const std::string & path_to, + LoggerPtr logger, + const KeeperContextPtr & keeper_context); + +} diff --git a/src/Coordination/KeeperSnapshotManager.cpp b/src/Coordination/KeeperSnapshotManager.cpp index 61bcdf023cf..4ae39487e0b 100644 --- a/src/Coordination/KeeperSnapshotManager.cpp +++ b/src/Coordination/KeeperSnapshotManager.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -33,79 +33,15 @@ namespace ErrorCodes namespace { - constexpr std::string_view tmp_prefix = "tmp_"; - - void moveFileBetweenDisks( + void moveSnapshotFileBetweenDisks( DiskPtr disk_from, const std::string & path_from, DiskPtr disk_to, const std::string & path_to, const KeeperContextPtr & keeper_context) { - auto logger = getLogger("KeeperSnapshotManager"); - LOG_TRACE(logger, "Moving {} to {} from disk {} to disk {}", path_from, path_to, disk_from->getName(), disk_to->getName()); - /// we use empty file with prefix tmp_ to detect incomplete copies - /// if a copy is complete we don't care from which disk we use the same file - /// so it's okay if a failure happens after removing of tmp file but before we remove - /// the snapshot from the source disk - auto from_path = fs::path(path_from); - auto tmp_snapshot_name = from_path.parent_path() / (std::string{tmp_prefix} + from_path.filename().string()); - - const auto & coordination_settings = keeper_context->getCoordinationSettings(); - auto max_retries_on_init = coordination_settings->disk_move_retries_during_init.value; - auto retries_sleep = std::chrono::milliseconds(coordination_settings->disk_move_retries_wait_ms); - auto run_with_retries = [&](const auto & op, std::string_view operation_description) - { - size_t retry_num = 0; - do - { - try - { - op(); - return true; - } - catch (...) - { - tryLogCurrentException( - logger, - fmt::format( - "While moving snapshot {} to disk {} and running '{}'", path_from, disk_to->getName(), operation_description)); - std::this_thread::sleep_for(retries_sleep); - } - - ++retry_num; - if (keeper_context->getServerState() == KeeperContext::Phase::INIT && retry_num == max_retries_on_init) - { - LOG_ERROR(logger, "Operation '{}' failed too many times", operation_description); - break; - } - } while (!keeper_context->isShutdownCalled()); - - LOG_ERROR( - logger, - "Failed to run '{}' while moving snapshot {} to disk {}", - operation_description, - path_from, - disk_to->getName()); - return false; - }; - - std::array, std::string_view>, 4> operations{ - std::pair{ - [&] - { - auto buf = disk_to->writeFile(tmp_snapshot_name); - buf->finalize(); - }, - "creating temporary file"}, - std::pair{[&] { disk_from->copyFile(from_path, *disk_to, path_to, {}); }, "copying file"}, - std::pair{[&] { disk_to->removeFileIfExists(tmp_snapshot_name); }, "removing temporary file"}, - std::pair{[&] { disk_from->removeFileIfExists(path_from); }, "removing snapshot file from source disk"}, - }; - - for (const auto & [op, operation_description] : operations) - if (!run_with_retries(op, operation_description)) - return; + moveFileBetweenDisks( + std::move(disk_from), path_from, std::move(disk_to), path_to, getLogger("KeeperSnapshotManager"), keeper_context); } uint64_t getSnapshotPathUpToLogIdx(const String & snapshot_path) @@ -639,9 +575,9 @@ KeeperSnapshotManager::KeeperSnapshotManager( std::vector snapshot_files; for (auto it = disk->iterateDirectory(""); it->isValid(); it->next()) { - if (it->name().starts_with(tmp_prefix)) + if (it->name().starts_with(tmp_keeper_file_prefix)) { - incomplete_files.emplace(it->name().substr(tmp_prefix.size()), it->path()); + incomplete_files.emplace(it->name().substr(tmp_keeper_file_prefix.size()), it->path()); continue; } @@ -831,7 +767,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() { if (file_info.disk != latest_snapshot_disk) { - moveFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context); + moveSnapshotFileBetweenDisks(file_info.disk, file_info.path, latest_snapshot_disk, file_info.path, keeper_context); file_info.disk = latest_snapshot_disk; } } @@ -839,7 +775,7 @@ void KeeperSnapshotManager::moveSnapshotsIfNeeded() { if (file_info.disk != disk) { - moveFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context); + moveSnapshotFileBetweenDisks(file_info.disk, file_info.path, disk, file_info.path, keeper_context); file_info.disk = disk; } } diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index eaa0c3c9e68..9321fb33163 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include @@ -26,7 +26,6 @@ #include #include -#include namespace ProfileEvents { diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index c7b1abf1d83..c205db942b9 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Coordination/pathUtils.cpp b/src/Coordination/pathUtils.cpp deleted file mode 100644 index 25f8e25cf06..00000000000 --- a/src/Coordination/pathUtils.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include - -namespace DB -{ - -static size_t findLastSlash(StringRef path) -{ - if (path.size == 0) - return std::string::npos; - - for (size_t i = path.size - 1; i > 0; --i) - { - if (path.data[i] == '/') - return i; - } - - if (path.data[0] == '/') - return 0; - - return std::string::npos; -} - -StringRef parentNodePath(StringRef path) -{ - auto rslash_pos = findLastSlash(path); - if (rslash_pos > 0) - return StringRef{path.data, rslash_pos}; - return "/"; -} - -StringRef getBaseNodeName(StringRef path) -{ - size_t basename_start = findLastSlash(path); - return StringRef{path.data + basename_start + 1, path.size - basename_start - 1}; -} - -} diff --git a/src/Coordination/pathUtils.h b/src/Coordination/pathUtils.h deleted file mode 100644 index b2b79b14110..00000000000 --- a/src/Coordination/pathUtils.h +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -StringRef parentNodePath(StringRef path); - -StringRef getBaseNodeName(StringRef path); - -} diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index c6d98e6f3dd..763804ba389 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1,8 +1,6 @@ #include #include -#include "Common/ZooKeeper/IKeeper.h" -#include "Core/Defines.h" #include "config.h" #if USE_NURAFT @@ -22,7 +20,7 @@ #include #include #include -#include +#include #include #include #include From 32130d7f78c28ea688af728744ca1bb2e989b576 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Feb 2024 15:55:22 +0100 Subject: [PATCH 114/145] Fix low cardinality too --- src/Functions/array/arrayReduce.cpp | 26 +++++++++++++------ .../02996_nullable_arrayReduce.reference | 3 ++- .../02996_nullable_arrayReduce.sql | 5 +++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 4e192cd7d99..d47d1ae98cc 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -1,14 +1,15 @@ -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -51,6 +52,8 @@ public: /// As we parse the function name and deal with arrays we don't want to default NULL handler, which will hide /// nullability from us (which also means hidden from the aggregate functions) bool useDefaultImplementationForNulls() const override { return false; } + /// Same for low cardinality. We want to return exactly what the aggregate function returns, no meddling + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; @@ -118,7 +121,8 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume const IAggregateFunction & agg_func = *aggregate_function; std::unique_ptr arena = std::make_unique(); - /// Aggregate functions do not support constant columns. Therefore, we materialize them. + /// Aggregate functions do not support constant or lowcardinality columns. Therefore, we materialize them and + /// keep a reference so they are alive until we finish using their nested columns (array data/offset) std::vector materialized_columns; const size_t num_arguments_columns = arguments.size() - 1; @@ -129,6 +133,12 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume for (size_t i = 0; i < num_arguments_columns; ++i) { const IColumn * col = arguments[i + 1].column.get(); + auto col_no_lowcardinality = recursiveRemoveLowCardinality(arguments[i + 1].column); + if (col_no_lowcardinality != arguments[i + 1].column) + { + materialized_columns.emplace_back(col_no_lowcardinality); + col = col_no_lowcardinality.get(); + } const ColumnArray::Offsets * offsets_i = nullptr; if (const ColumnArray * arr = checkAndGetColumn(col)) diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference index c6f369c1237..96afb8546ef 100644 --- a/tests/queries/0_stateless/02996_nullable_arrayReduce.reference +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.reference @@ -11,4 +11,5 @@ SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a \N Nullable(UInt8) SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); 10 Nullable(UInt8) -SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce('median', [toLowCardinality(toNullable(8))]) as t, toTypeName(t); +8 Nullable(Float64) diff --git a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql index 26697d2f10c..8f69296dbe5 100644 --- a/tests/queries/0_stateless/02996_nullable_arrayReduce.sql +++ b/tests/queries/0_stateless/02996_nullable_arrayReduce.sql @@ -2,6 +2,8 @@ SELECT arrayReduce(toNullable('stddevSampOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayReduce(toNullable('median'), [toDecimal32OrNull(toFixedString('1', 1), 2), 8]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT toFixedString('--- Int Empty ---', toLowCardinality(17)), arrayReduce(toNullable('avgOrNull'), [1]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce(toLowCardinality('median'), [toLowCardinality(toNullable(8))]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -- { echoOn } SELECT arrayReduce('sum', []::Array(UInt8)) as a, toTypeName(a); @@ -11,4 +13,5 @@ SELECT arrayReduce('sum', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a SELECT arrayReduce('any_respect_nulls', [NULL, 10]::Array(Nullable(UInt8))) as a, toTypeName(a); SELECT arrayReduce('any_respect_nulls', [10, NULL]::Array(Nullable(UInt8))) as a, toTypeName(a); -SELECT arrayReduce('any', toNullable(3)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayReduce('median', [toLowCardinality(toNullable(8))]) as t, toTypeName(t); +-- { echoOff } From f97d7bd0ab21b1c917943c0ec89c196f671162da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Feb 2024 16:24:30 +0100 Subject: [PATCH 115/145] Not sure why it's running with aarch64 --- tests/queries/0_stateless/02995_new_settings_history.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh index 8932e00086b..dca9882eaf7 100755 --- a/tests/queries/0_stateless/02995_new_settings_history.sh +++ b/tests/queries/0_stateless/02995_new_settings_history.sh @@ -1,14 +1,13 @@ #!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# Some settings can be different for builds with sanitizers or aarch64 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Tags: no-tsan, no-asan, no-msan, no-ubsan, no-cpu-aarch64 -# Some settings can be different for builds with sanitizers - # Note that this is a broad check. A per version check is done in the upgrade test -# Baseline generated with 23.12 +# Baseline generated with 23.12.1 # clickhouse local --query "select name, default from system.settings order by name format TSV" > 02995_baseline_23_12_1.tsv $CLICKHOUSE_LOCAL --query " WITH old_settings AS From 738d1b1ddd71ead034949199c9756e4040dc71d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Feb 2024 18:24:35 +0100 Subject: [PATCH 116/145] Ignore dynamic defaults --- tests/queries/0_stateless/02995_new_settings_history.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02995_new_settings_history.sh b/tests/queries/0_stateless/02995_new_settings_history.sh index dca9882eaf7..8de98c55b6a 100755 --- a/tests/queries/0_stateless/02995_new_settings_history.sh +++ b/tests/queries/0_stateless/02995_new_settings_history.sh @@ -16,7 +16,8 @@ $CLICKHOUSE_LOCAL --query " ), new_settings AS ( - select name, default from system.settings order by name + -- Ignore settings that depend on the machine config (max_threads and similar) + SELECT name, default FROM system.settings WHERE default NOT LIKE '%auto(%' ) SELECT * FROM ( From db974098d073496d640bf08f0e43f3aa572ad3ff Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 19:54:12 +0000 Subject: [PATCH 117/145] More refactoring and cleanup --- .../functions/tuple-functions.md | 2 +- src/Functions/array/arrayDotProduct.cpp | 189 ++++++++++++++++-- src/Functions/array/arrayScalarProduct.h | 180 ----------------- 3 files changed, 175 insertions(+), 196 deletions(-) delete mode 100644 src/Functions/array/arrayScalarProduct.h diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 5930239dc56..b089de67e98 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -542,7 +542,7 @@ Alias: `scalarProduct`. - Scalar product. -Type: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +Type: [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Example** diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 180f85499cd..8a63d11de7d 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -1,11 +1,13 @@ +#include +#include +#include #include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include namespace DB @@ -13,9 +15,163 @@ namespace DB namespace ErrorCodes { + extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; } +template +class FunctionArrayScalarProduct : public IFunction +{ +public: + static constexpr auto name = Name::name; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + std::array nested_types; + for (size_t i = 0; i < 2; ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments for function {} must be of type Array", getName()); + + const auto & nested_type = array_type->getNestedType(); + if (!isNativeNumber(nested_type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} cannot process values of type {}", getName(), nested_type->getName()); + + nested_types[i] = nested_type; + } + + return Impl::getReturnType(nested_types[0], nested_types[1]); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override + { + switch (result_type->getTypeId()) + { + #define SUPPORTED_TYPE(type) \ + case TypeIndex::type: \ + return executeWithResultType(arguments); \ + break; + + SUPPORTED_TYPE(UInt8) + SUPPORTED_TYPE(UInt16) + SUPPORTED_TYPE(UInt32) + SUPPORTED_TYPE(UInt64) + SUPPORTED_TYPE(Int8) + SUPPORTED_TYPE(Int16) + SUPPORTED_TYPE(Int32) + SUPPORTED_TYPE(Int64) + SUPPORTED_TYPE(Float32) + SUPPORTED_TYPE(Float64) + #undef SUPPORTED_TYPE + + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); + } + } + +private: + template + ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const + { + ColumnPtr res; + if (!((res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)) + || (res = executeWithResultTypeAndLeft(arguments)))) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + + return res; + } + + template + ColumnPtr executeWithResultTypeAndLeft(const ColumnsWithTypeAndName & arguments) const + { + ColumnPtr res; + if ( (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments)) + || (res = executeWithResultTypeAndLeftAndRight(arguments))) + return res; + + return nullptr; + } + + template + ColumnPtr executeWithResultTypeAndLeftAndRight(const ColumnsWithTypeAndName & arguments) const + { + ColumnPtr col_left = arguments[0].column->convertToFullColumnIfConst(); + ColumnPtr col_right = arguments[1].column->convertToFullColumnIfConst(); + if (!col_left || !col_right) + return nullptr; + + const ColumnArray * col_arr_left = checkAndGetColumn(col_left.get()); + const ColumnArray * cokl_arr_right = checkAndGetColumn(col_right.get()); + if (!col_arr_left || !cokl_arr_right) + return nullptr; + + const ColumnVector * col_arr_nested_left = checkAndGetColumn>(col_arr_left->getData()); + const ColumnVector * col_arr_nested_right = checkAndGetColumn>(cokl_arr_right->getData()); + if (!col_arr_nested_left || !col_arr_nested_right) + return nullptr; + + if (!col_arr_left->hasEqualOffsets(*cokl_arr_right)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); + + auto col_res = ColumnVector::create(); + + vector( + col_arr_nested_left->getData(), + col_arr_nested_right->getData(), + col_arr_left->getOffsets(), + col_res->getData()); + + return col_res; + } + + template + static NO_INLINE void vector( + const PaddedPODArray & left, + const PaddedPODArray & right, + const ColumnArray::Offsets & offsets, + PaddedPODArray & result) + { + size_t size = offsets.size(); + result.resize(size); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + size_t array_size = offsets[i] - current_offset; + result[i] = Impl::template apply(&left[current_offset], &right[current_offset], array_size); + current_offset = offsets[i]; + } + } + +}; + struct NameArrayDotProduct { static constexpr auto name = "arrayDotProduct"; @@ -29,16 +185,18 @@ public: using Types = TypeList; + Types types; DataTypePtr result_type; - bool valid = castTypeToEither(Types{}, left.get(), [&](const auto & left_) + bool valid = castTypeToEither(types, left.get(), [&](const auto & left_) { - return castTypeToEither(Types{}, right.get(), [&](const auto & right_) + return castTypeToEither(types, right.get(), [&](const auto & right_) { - using LeftDataType = typename std::decay_t::FieldType; - using RightDataType = typename std::decay_t::FieldType; - using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; - if (std::is_same_v && std::is_same_v) + using LeftType = typename std::decay_t::FieldType; + using RightType = typename std::decay_t::FieldType; + using ResultType = typename NumberTraits::ResultOfAdditionMultiplication::Type; + + if constexpr (std::is_same_v && std::is_same_v) result_type = std::make_shared(); else result_type = std::make_shared>(); @@ -54,10 +212,10 @@ public: return result_type; } - template + template static NO_SANITIZE_UNDEFINED ResultType apply( - const T * left, - const U * right, + const LeftType * left, + const RightType * right, size_t size) { ResultType result = 0; @@ -76,4 +234,5 @@ REGISTER_FUNCTION(ArrayDotProduct) // These functions are used by TupleOrArrayFunction in Function/vectorFunctions.cpp FunctionPtr createFunctionArrayDotProduct(ContextPtr context_) { return FunctionArrayDotProduct::create(context_); } + } diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h deleted file mode 100644 index 2bef11eade5..00000000000 --- a/src/Functions/array/arrayScalarProduct.h +++ /dev/null @@ -1,180 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -class Context; - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int LOGICAL_ERROR; -} - - -template -class FunctionArrayScalarProduct : public IFunction -{ -public: - static constexpr auto name = Name::name; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - String getName() const override { return name; } - size_t getNumberOfArguments() const override { return 2; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - // Basic type check - std::vector nested_types(2, nullptr); - for (size_t i = 0; i < 2; ++i) - { - const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Arguments for function {} must be of type Array", getName()); - - const auto & nested_type = array_type->getNestedType(); - if (!isNativeNumber(nested_type) && !isEnum(nested_type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} cannot process values of type {}", getName(), nested_type->getName()); - - nested_types[i] = nested_type; - } - - // Perform further type checks in Method - return Method::getReturnType(nested_types[0], nested_types[1]); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override - { - switch (result_type->getTypeId()) - { - #define SUPPORTED_TYPE(type) \ - case TypeIndex::type: \ - return executeWithResultType(arguments); \ - break; - - SUPPORTED_TYPE(UInt8) - SUPPORTED_TYPE(UInt16) - SUPPORTED_TYPE(UInt32) - SUPPORTED_TYPE(UInt64) - SUPPORTED_TYPE(Int8) - SUPPORTED_TYPE(Int16) - SUPPORTED_TYPE(Int32) - SUPPORTED_TYPE(Int64) - SUPPORTED_TYPE(Float32) - SUPPORTED_TYPE(Float64) - #undef SUPPORTED_TYPE - - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); - } - } - -private: - template - ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr res; - if (!((res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)) - || (res = executeNumber(arguments)))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); - - return res; - } - - template - ColumnPtr executeNumber(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr res; - if ( (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments)) - || (res = executeNumberNumber(arguments))) - return res; - - return nullptr; - } - - template - ColumnPtr executeNumberNumber(const ColumnsWithTypeAndName & arguments) const - { - ColumnPtr col1 = arguments[0].column->convertToFullColumnIfConst(); - ColumnPtr col2 = arguments[1].column->convertToFullColumnIfConst(); - if (!col1 || !col2) - return nullptr; - - const ColumnArray * col_array1 = checkAndGetColumn(col1.get()); - const ColumnArray * col_array2 = checkAndGetColumn(col2.get()); - if (!col_array1 || !col_array2) - return nullptr; - - if (!col_array1->hasEqualOffsets(*col_array2)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); - - const ColumnVector * col_nested1 = checkAndGetColumn>(col_array1->getData()); - const ColumnVector * col_nested2 = checkAndGetColumn>(col_array2->getData()); - if (!col_nested1 || !col_nested2) - return nullptr; - - auto col_res = ColumnVector::create(); - - vector( - col_nested1->getData(), - col_nested2->getData(), - col_array1->getOffsets(), - col_res->getData()); - - return col_res; - } - - template - static NO_INLINE void vector( - const PaddedPODArray & data1, - const PaddedPODArray & data2, - const ColumnArray::Offsets & offsets, - PaddedPODArray & result) - { - size_t size = offsets.size(); - result.resize(size); - - ColumnArray::Offset current_offset = 0; - for (size_t i = 0; i < size; ++i) - { - size_t array_size = offsets[i] - current_offset; - result[i] = Method::template apply(&data1[current_offset], &data2[current_offset], array_size); - current_offset = offsets[i]; - } - } - -}; - -} - From 877dc695f29b0df163a847ad323eaf2d20552af5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 20:09:54 +0000 Subject: [PATCH 118/145] Make the tests more systematic --- .../0_stateless/02708_dotProduct.reference | 34 ++++++++++++ .../queries/0_stateless/02708_dotProduct.sql | 47 ++++++++++++++++ .../0_stateless/02708_dot_product.reference | 14 ----- .../queries/0_stateless/02708_dot_product.sql | 55 ------------------- 4 files changed, 81 insertions(+), 69 deletions(-) create mode 100644 tests/queries/0_stateless/02708_dotProduct.reference create mode 100644 tests/queries/0_stateless/02708_dotProduct.sql delete mode 100644 tests/queries/0_stateless/02708_dot_product.reference delete mode 100644 tests/queries/0_stateless/02708_dot_product.sql diff --git a/tests/queries/0_stateless/02708_dotProduct.reference b/tests/queries/0_stateless/02708_dotProduct.reference new file mode 100644 index 00000000000..5cc9a9f0502 --- /dev/null +++ b/tests/queries/0_stateless/02708_dotProduct.reference @@ -0,0 +1,34 @@ +-- Negative tests +-- Tests + -- Array +[1,2,3] [4,5,6] 32 UInt16 +[1,2,3] [4,5,6] 32 UInt32 +[1,2,3] [4,5,6] 32 UInt64 +[1,2,3] [4,5,6] 32 UInt64 +[-1,-2,-3] [4,5,6] -32 Int16 +[-1,-2,-3] [4,5,6] -32 Int32 +[-1,-2,-3] [4,5,6] -32 Int64 +[-1,-2,-3] [4,5,6] -32 Int64 +[1,2,3] [4,5,6] 32 Float32 +[1,2,3] [4,5,6] 32 Float64 + -- Tuple +(1,2,3) (4,5,6) 32 UInt64 +(1,2,3) (4,5,6) 32 UInt64 +(1,2,3) (4,5,6) 32 UInt64 +(1,2,3) (4,5,6) 32 UInt64 +(-1,-2,-3) (4,5,6) -32 Int64 +(-1,-2,-3) (4,5,6) -32 Int64 +(-1,-2,-3) (4,5,6) -32 Int64 +(-1,-2,-3) (4,5,6) -32 Int64 +(1,2,3) (4,5,6) 32 Float64 +(1,2,3) (4,5,6) 32 Float64 +-- Non-const argument +[1,2,3] [4,5,6] 32 UInt16 + -- Array with mixed element arguments types (result type is the supertype) +[1,2,3] [4,5,6] 32 Float32 + -- Tuple with mixed element arguments types +(1,2,3) (4,5,6) 32 Float64 +-- Aliases +32 +32 +32 diff --git a/tests/queries/0_stateless/02708_dotProduct.sql b/tests/queries/0_stateless/02708_dotProduct.sql new file mode 100644 index 00000000000..6ad615664e8 --- /dev/null +++ b/tests/queries/0_stateless/02708_dotProduct.sql @@ -0,0 +1,47 @@ +SELECT '-- Negative tests'; + +SELECT arrayDotProduct([1, 2]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT arrayDotProduct([1, 2], 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayDotProduct('abc', [1, 2]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayDotProduct([1, 2], ['abc', 'def']); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayDotProduct([1, 2], [3, 4, 5]); -- { serverError BAD_ARGUMENTS } +SELECT dotProduct([1, 2], (3, 4, 5)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT '-- Tests'; +SELECT ' -- Array'; +SELECT [1, 2, 3]::Array(UInt8) AS x, [4, 5, 6]::Array(UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(UInt16) AS x, [4, 5, 6]::Array(UInt16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(UInt32) AS x, [4, 5, 6]::Array(UInt32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(UInt64) AS x, [4, 5, 6]::Array(UInt64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int8) AS x, [4, 5, 6]::Array(Int8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int16) AS x, [4, 5, 6]::Array(Int16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int32) AS x, [4, 5, 6]::Array(Int32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [-1, -2, -3]::Array(Int64) AS x, [4, 5, 6]::Array(Int64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(Float32) AS x, [4, 5, 6]::Array(Float32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT [1, 2, 3]::Array(Float64) AS x, [4, 5, 6]::Array(Float64) AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT ' -- Tuple'; +SELECT (1::UInt8, 2::UInt8, 3::UInt8) AS x, (4::UInt8, 5::UInt8, 6::UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::UInt16, 2::UInt16, 3::UInt16) AS x, (4::UInt16, 5::UInt16, 6::UInt16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::UInt32, 2::UInt32, 3::UInt32) AS x, (4::UInt32, 5::UInt32, 6::UInt32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::UInt64, 2::UInt64, 3::UInt64) AS x, (4::UInt64, 5::UInt64, 6::UInt64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int8, -2::Int8, -3::Int8) AS x, (4::Int8, 5::Int8, 6::Int8) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int16, -2::Int16, -3::Int16) AS x, (4::Int16, 5::Int16, 6::Int16) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int32, -2::Int32, -3::Int32) AS x, (4::Int32, 5::Int32, 6::Int32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (-1::Int64, -2::Int64, -3::Int64) AS x, (4::Int64, 5::Int64, 6::Int64) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::Float32, 2::Float32, 3::Float32) AS x, (4::Float32, 5::Float32, 6::Float32) AS y, dotProduct(x, y) AS res, toTypeName(res); +SELECT (1::Float64, 2::Float64, 3::Float64) AS x, (4::Float64, 5::Float64, 6::Float64) AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT '-- Non-const argument'; +SELECT materialize([1::UInt8, 2::UInt8, 3::UInt8]) AS x, [4::UInt8, 5::UInt8, 6::UInt8] AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT ' -- Array with mixed element arguments types (result type is the supertype)'; +SELECT [1::UInt16, 2::UInt8, 3::Float32] AS x, [4::Int16, 5::Float32, 6::UInt8] AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT ' -- Tuple with mixed element arguments types'; +SELECT (1::UInt16, 2::UInt8, 3::Float32) AS x, (4::Int16, 5::Float32, 6::UInt8) AS y, dotProduct(x, y) AS res, toTypeName(res); + +SELECT '-- Aliases'; +SELECT scalarProduct([1, 2, 3], [4, 5, 6]); +SELECT scalarProduct((1, 2, 3), (4, 5, 6)); +SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]); -- actually no alias but the internal function for arrays diff --git a/tests/queries/0_stateless/02708_dot_product.reference b/tests/queries/0_stateless/02708_dot_product.reference deleted file mode 100644 index 45e53871aa2..00000000000 --- a/tests/queries/0_stateless/02708_dot_product.reference +++ /dev/null @@ -1,14 +0,0 @@ -3881.304 -3881.304 -3881.304 -376.5 -230 -0 -0 -Float64 -Float32 -Float64 -Float64 -UInt16 -UInt64 -Int64 diff --git a/tests/queries/0_stateless/02708_dot_product.sql b/tests/queries/0_stateless/02708_dot_product.sql deleted file mode 100644 index e94cb577bf4..00000000000 --- a/tests/queries/0_stateless/02708_dot_product.sql +++ /dev/null @@ -1,55 +0,0 @@ -SELECT dotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); - -SELECT scalarProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); - -SELECT arrayDotProduct([12, 2.22, 302], [1.32, 231.2, 11.1]); - -SELECT dotProduct([1.3, 2, 3, 4, 5], [222, 12, 5.3, 2, 8]); - -SELECT dotProduct([1, 1, 1, 1, 1], [222, 12, 0, -12, 8]); - -SELECT round(dotProduct([12345678901234567], [1]) - dotProduct(tuple(12345678901234567), tuple(1)), 2); - -SELECT round(dotProduct([-1, 2, 3.002], [2, 3.4, 4]) - dotProduct((-1, 2, 3.002), (2, 3.4, 4)), 2); - -DROP TABLE IF EXISTS product_fp64_fp64; -CREATE TABLE product_fp64_fp64 (x Array(Float64), y Array(Float64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_fp64_fp64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_fp64_fp64; -DROP TABLE product_fp64_fp64; - -DROP TABLE IF EXISTS product_fp32_fp32; -CREATE TABLE product_fp32_fp32 (x Array(Float32), y Array(Float32)) engine = MergeTree() order by x; -INSERT INTO TABLE product_fp32_fp32 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp32; -DROP TABLE product_fp32_fp32; - -DROP TABLE IF EXISTS product_fp32_fp64; -CREATE TABLE product_fp32_fp64 (x Array(Float32), y Array(Float64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_fp32_fp64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_fp32_fp64; -DROP TABLE product_fp32_fp64; - -DROP TABLE IF EXISTS product_uint8_fp64; -CREATE TABLE product_uint8_fp64 (x Array(UInt8), y Array(Float64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_uint8_fp64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_uint8_fp64; -DROP TABLE product_uint8_fp64; - -DROP TABLE IF EXISTS product_uint8_uint8; -CREATE TABLE product_uint8_uint8 (x Array(UInt8), y Array(UInt8)) engine = MergeTree() order by x; -INSERT INTO TABLE product_uint8_uint8 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_uint8_uint8; -DROP TABLE product_uint8_uint8; - -DROP TABLE IF EXISTS product_uint64_uint64; -CREATE TABLE product_uint64_uint64 (x Array(UInt64), y Array(UInt64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_uint64_uint64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_uint64_uint64; -DROP TABLE product_uint64_uint64; - -DROP TABLE IF EXISTS product_int32_uint64; -CREATE TABLE product_int32_uint64 (x Array(Int32), y Array(UInt64)) engine = MergeTree() order by x; -INSERT INTO TABLE product_int32_uint64 (x, y) values ([1, 2], [3, 4]); -SELECT toTypeName(dotProduct(x, y)) from product_int32_uint64; -DROP TABLE product_int32_uint64; From f928eaf10ca06c7dec855a18c4d58b0f728dabee Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 20 Feb 2024 21:19:30 +0000 Subject: [PATCH 119/145] CI: hot fix for gh statuses #no_merge_commit #job_style_check #job_Stateless_tests_debug --- tests/ci/ci.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 819152fadc3..4d2b124a32c 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1642,13 +1642,7 @@ def main() -> int: if not args.skip_jobs: ci_cache = CiCache(s3, jobs_data["digests"]) - if ( - pr_info.is_release_branch() - or pr_info.event.get("pull_request", {}) - .get("user", {}) - .get("login", "not_maxknv") - == "maxknv" - ): + if pr_info.is_release_branch(): # wait for pending jobs to be finished, await_jobs is a long blocking call # wait pending jobs (for now only on release/master branches) ready_jobs_batches_dict = ci_cache.await_jobs( @@ -1838,7 +1832,7 @@ def main() -> int: pr_info.sha, job_report.test_results, job_report.additional_files, - job_report.check_name or args.job_name, + job_report.check_name or _get_ext_check_name(args.job_name), additional_urls=additional_urls or None, ) commit = get_commit( @@ -1849,7 +1843,7 @@ def main() -> int: job_report.status, check_url, format_description(job_report.description), - job_report.check_name or args.job_name, + job_report.check_name or _get_ext_check_name(args.job_name), pr_info, dump_to_file=True, ) @@ -1867,7 +1861,7 @@ def main() -> int: job_report.duration, job_report.start_time, check_url or "", - job_report.check_name or args.job_name, + job_report.check_name or _get_ext_check_name(args.job_name), ) ch_helper.insert_events_into( db="default", table="checks", events=prepared_events From 7d354164a5c47b762cfac2aea7406cd99fc00fe0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 21:41:10 +0000 Subject: [PATCH 120/145] Add performance test for dotProduct() --- tests/performance/dotProduct.xml | 62 +++++++++++++++++++++++++++++ tests/performance/norm_distance.xml | 6 +-- 2 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 tests/performance/dotProduct.xml diff --git a/tests/performance/dotProduct.xml b/tests/performance/dotProduct.xml new file mode 100644 index 00000000000..6e056964ebb --- /dev/null +++ b/tests/performance/dotProduct.xml @@ -0,0 +1,62 @@ + + + + + element_type + + + + + + + Float32 + Float64 + + + + + + CREATE TABLE vecs_{element_type} ( + v Array({element_type}) + ) ENGINE=Memory; + + + + + + + INSERT INTO vecs_{element_type} + SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) + ] AS v + FROM system.numbers + LIMIT 5000000 + ); + + + + 1 + + + SELECT sum(dp) FROM (SELECT dotProduct(v, v) AS dp FROM vecs_{element_type}) + + DROP TABLE vecs_{element_type} + + diff --git a/tests/performance/norm_distance.xml b/tests/performance/norm_distance.xml index 1e879607dac..69ed71d026f 100644 --- a/tests/performance/norm_distance.xml +++ b/tests/performance/norm_distance.xml @@ -4,11 +4,11 @@ element_type - + - Int32 - Int64 + + Float32 Float64 From e57b8f64e0e8112c063daacba4933145000603de Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 22:16:32 +0000 Subject: [PATCH 121/145] Help the compiler a bit with auto-vectorization For query SELECT sum(dp) FROM (SELECT dotProduct(v, v) AS dp FROM vectors) with vectors of type Array(Float32) and Array(Float64) and dimension = 150, runtimes go down from 0.46 sec to 0.34 sec (Float32) and 0.74 sec to 0.68 sec (Float64)) The latter (only a minor improvement) is independent of the number of lanes (VEC_SIZE = 4 vs. 2 which is the theoretical optimal for Float64). --- src/Functions/array/arrayDotProduct.cpp | 46 ++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 8a63d11de7d..209c5fc1ac5 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -212,16 +212,54 @@ public: return result_type; } + /// Modeled after the implementation of distance functions L1Distance(), L2Distance() etc. + template + struct State + { + Type sum = 0; + + void accumulate(Type x, Type y) + { + sum += x * y; + } + + void combine(const State & other_state) + { + sum += other_state.sum; + } + + Type finalize() + { + return sum; + } + }; + template static NO_SANITIZE_UNDEFINED ResultType apply( const LeftType * left, const RightType * right, size_t size) { - ResultType result = 0; - for (size_t i = 0; i < size; ++i) - result += static_cast(left[i]) * static_cast(right[i]); - return result; + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + State states[VEC_SIZE]; + size_t i = 0; + for (; i + VEC_SIZE < size; i += VEC_SIZE) + { + for (size_t j = 0; j < VEC_SIZE; ++j) + states[j].accumulate(static_cast(left[i + j]), static_cast(right[i + j])); + } + + State state; + for (const auto & other_state : states) + state.combine(other_state); + + /// Process the tail + for (; i < size; ++i) + state.accumulate(static_cast(left[i]), static_cast(right[i])); + + ResultType res = state.finalize(); + return res; } }; From daa61a8576918ef650e314d06f4ece08cbbc74e7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Feb 2024 22:51:11 +0000 Subject: [PATCH 122/145] Manualy vectorize arrayDotProduct() Measurements go down from - 0.34 sec --> 0.30 sec (Float32) - 0.68 sec --> 0.54 sec --- src/Functions/array/arrayDotProduct.cpp | 62 ++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 209c5fc1ac5..2fc162208ae 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -9,6 +9,9 @@ #include #include +#if USE_MULTITARGET_CODE +#include +#endif namespace DB { @@ -234,25 +237,80 @@ public: } }; +#if USE_MULTITARGET_CODE + template + AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine( + const ResultType * __restrict data_x, + const ResultType * __restrict data_y, + size_t i_max, + size_t & i, + State & state) + { + __m512 sums; + if constexpr (std::is_same_v) + sums = _mm512_setzero_ps(); + else + sums = _mm512_setzero_pd(); + + const size_t n = (std::is_same_v) ? 16 : 8; + + for (; i + n < i_max; i += n) + { + if constexpr (std::is_same_v) + { + __m512 x = _mm512_loadu_ps(data_x + i); + __m512 y = _mm512_loadu_ps(data_y + i); + sums = _mm512_fmadd_ps(x, y, sums); + } + else + { + __m512 x = _mm512_loadu_pd(data_x + i); + __m512 y = _mm512_loadu_pd(data_y + i); + sums = _mm512_fmadd_pd(x, y, sums); + } + } + + if constexpr (std::is_same_v) + state.sum = _mm512_reduce_add_ps(sums); + else + state.sum = _mm512_reduce_add_pd(sums); + } +#endif + template static NO_SANITIZE_UNDEFINED ResultType apply( const LeftType * left, const RightType * right, size_t size) { + State state; + size_t i = 0; + + /// SIMD optimization: process multiple elements in both input arrays at once. + /// To avoid combinatorial explosion of SIMD kernels, focus on + /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x + /// 10 input types x 8 output types, + /// - the most powerful SIMD instruction set (AVX-512F). +#if USE_MULTITARGET_CODE + if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v && std::is_same_v) + { + if (isArchSupported(TargetArch::AVX512F)) + accumulateCombine(left, right, size, i, state); + } +#else /// Process chunks in vectorized manner static constexpr size_t VEC_SIZE = 4; State states[VEC_SIZE]; - size_t i = 0; for (; i + VEC_SIZE < size; i += VEC_SIZE) { for (size_t j = 0; j < VEC_SIZE; ++j) states[j].accumulate(static_cast(left[i + j]), static_cast(right[i + j])); } - State state; for (const auto & other_state : states) state.combine(other_state); +#endif /// Process the tail for (; i < size; ++i) From c0e1f5016843607180612322e1ab81bf361c7c7e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 20 Feb 2024 12:09:48 +0300 Subject: [PATCH 123/145] Copy S3 file GCP fallback to buffer copy --- src/IO/S3/copyS3File.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 98024e74f8e..e9f4a555b05 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -746,7 +746,10 @@ namespace break; } - if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest" || outcome.GetError().GetExceptionName() == "InvalidArgument") + if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || + outcome.GetError().GetExceptionName() == "InvalidRequest" || + outcome.GetError().GetExceptionName() == "InvalidArgument" || + (outcome.GetError().GetExceptionName() == "InternalError" && outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT)) { if (!supports_multipart_copy) { From 25bfcdd21f594b15576652ea1c202a40ae02c800 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 21 Feb 2024 11:40:41 +0300 Subject: [PATCH 124/145] Fixed code review issues --- src/IO/S3/copyS3File.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index e9f4a555b05..51518df268c 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -749,7 +749,9 @@ namespace if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest" || outcome.GetError().GetExceptionName() == "InvalidArgument" || - (outcome.GetError().GetExceptionName() == "InternalError" && outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT)) + (outcome.GetError().GetExceptionName() == "InternalError" && + outcome.GetError().GetResponseCode() == Aws::Http::HttpResponseCode::GATEWAY_TIMEOUT && + outcome.GetError().GetMessage().contains("use the Rewrite method in the JSON API"))) { if (!supports_multipart_copy) { From 8d6088c2e60b0b4179b6cca0d30ab7c768ef749a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 21 Feb 2024 09:48:34 +0100 Subject: [PATCH 125/145] Fix test --- tests/integration/test_backup_restore_s3/configs/s3_settings.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml index adeb61cbe07..61ef7759b57 100644 --- a/tests/integration/test_backup_restore_s3/configs/s3_settings.xml +++ b/tests/integration/test_backup_restore_s3/configs/s3_settings.xml @@ -1,5 +1,6 @@ + 0 http://minio1:9001/root/data/backups/multipart/ Float32 and (Float64 x Float64) --> Float64 instead of 10 x + /// 10 input types x 8 output types, + /// - the most powerful SIMD instruction set (AVX-512F). #if USE_MULTITARGET_CODE - template - AVX512_FUNCTION_SPECIFIC_ATTRIBUTE static void accumulateCombine( - const ResultType * __restrict data_x, - const ResultType * __restrict data_y, - size_t i_max, - size_t & i, - State & state) - { - __m512 sums; - if constexpr (std::is_same_v) - sums = _mm512_setzero_ps(); - else - sums = _mm512_setzero_pd(); - - const size_t n = (std::is_same_v) ? 16 : 8; - - for (; i + n < i_max; i += n) - { - if constexpr (std::is_same_v) + if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v && std::is_same_v) { - __m512 x = _mm512_loadu_ps(data_x + i); - __m512 y = _mm512_loadu_ps(data_y + i); - sums = _mm512_fmadd_ps(x, y, sums); + if (isArchSupported(TargetArch::AVX512F)) + Kernel::template accumulateCombine(&left[current_offset], &right[current_offset], array_size, i, state); } - else - { - __m512 x = _mm512_loadu_pd(data_x + i); - __m512 y = _mm512_loadu_pd(data_y + i); - sums = _mm512_fmadd_pd(x, y, sums); - } - } - - if constexpr (std::is_same_v) - state.sum = _mm512_reduce_add_ps(sums); - else - state.sum = _mm512_reduce_add_pd(sums); - } -#endif - - template - static NO_SANITIZE_UNDEFINED ResultType apply( - const LeftType * left, - const RightType * right, - size_t size) - { - State state; - size_t i = 0; - - /// SIMD optimization: process multiple elements in both input arrays at once. - /// To avoid combinatorial explosion of SIMD kernels, focus on - /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x - /// 10 input types x 8 output types, - /// - the most powerful SIMD instruction set (AVX-512F). -#if USE_MULTITARGET_CODE - if constexpr ((std::is_same_v || std::is_same_v) - && std::is_same_v && std::is_same_v) - { - if (isArchSupported(TargetArch::AVX512F)) - accumulateCombine(left, right, size, i, state); - } #else - /// Process chunks in vectorized manner - static constexpr size_t VEC_SIZE = 4; - State states[VEC_SIZE]; - for (; i + VEC_SIZE < size; i += VEC_SIZE) - { - for (size_t j = 0; j < VEC_SIZE; ++j) - states[j].accumulate(static_cast(left[i + j]), static_cast(right[i + j])); - } + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + typename Kernel::template State states[VEC_SIZE]; + for (; i + VEC_SIZE < array_size; i += VEC_SIZE) + { + for (size_t j = 0; j < VEC_SIZE; ++j) + Kernel::template accumulate(states[j], static_cast(left[i + j]), static_cast(right[i + j])); + } - for (const auto & other_state : states) - state.combine(other_state); + for (const auto & other_state : states) + Kernel::template combine(state, other_state); #endif - /// Process the tail - for (; i < size; ++i) - state.accumulate(static_cast(left[i]), static_cast(right[i])); + /// Process the tail + for (; i < array_size; ++i) + Kernel::template accumulate(state, static_cast(left[i]), static_cast(right[i])); - ResultType res = state.finalize(); - return res; + /// ResultType res = Kernel::template finalize(state); + result[row] = Kernel::template finalize(state); + + current_offset = offsets[row]; + } } }; -using FunctionArrayDotProduct = FunctionArrayScalarProduct; +using FunctionArrayDotProduct = FunctionArrayScalarProduct; REGISTER_FUNCTION(ArrayDotProduct) { From 9f51e840db859a617827d6d086cf05f9a78d48f6 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 13 Feb 2024 12:15:46 +0000 Subject: [PATCH 129/145] CI: integration tests to mysql80 #ci_set_integration #no_merge_commit --- .../compose/docker_compose_mysql_cluster.yml | 6 +- tests/ci/ci_config.py | 8 - tests/integration/helpers/cluster.py | 98 +- tests/integration/helpers/external_sources.py | 2 +- .../test_mysql.py | 8 +- .../integration/test_dictionaries_ddl/test.py | 12 +- .../configs/dictionaries/mysql_dict1.xml | 4 +- .../configs/dictionaries/mysql_dict2.xml | 6 +- .../configs/named_collections.xml | 6 +- .../test_dictionaries_mysql/test.py | 16 +- .../test_disabled_mysql_server/test.py | 8 +- .../test_mask_sensitive_info/test.py | 48 +- .../test_materialized_mysql_database/test.py | 10 +- .../test_mysql57_database_engine/__init__.py | 0 .../configs/named_collections.xml | 23 + .../configs/remote_servers.xml | 12 + .../configs/user.xml | 10 + .../configs/users.xml | 9 + .../test_mysql57_database_engine/test.py | 1074 +++++++++++++++++ .../configs/named_collections.xml | 4 +- .../test_mysql_database_engine/test.py | 68 +- .../integration/test_odbc_interaction/test.py | 10 +- .../configs/named_collections.xml | 8 +- tests/integration/test_storage_mysql/test.py | 96 +- 24 files changed, 1334 insertions(+), 212 deletions(-) create mode 100644 tests/integration/test_mysql57_database_engine/__init__.py create mode 100644 tests/integration/test_mysql57_database_engine/configs/named_collections.xml create mode 100644 tests/integration/test_mysql57_database_engine/configs/remote_servers.xml create mode 100644 tests/integration/test_mysql57_database_engine/configs/user.xml create mode 100644 tests/integration/test_mysql57_database_engine/configs/users.xml create mode 100644 tests/integration/test_mysql57_database_engine/test.py diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml index 73f9e39f0d6..079c451b9d6 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml @@ -1,7 +1,7 @@ version: '2.3' services: mysql2: - image: mysql:5.7 + image: mysql:8.0 restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse @@ -23,7 +23,7 @@ services: source: ${MYSQL_CLUSTER_LOGS:-} target: /mysql/ mysql3: - image: mysql:5.7 + image: mysql:8.0 restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse @@ -45,7 +45,7 @@ services: source: ${MYSQL_CLUSTER_LOGS:-} target: /mysql/ mysql4: - image: mysql:5.7 + image: mysql:8.0 restart: always environment: MYSQL_ROOT_PASSWORD: clickhouse diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 3ebcbb7ed59..f2eaa407c7c 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -637,16 +637,8 @@ CI_CONFIG = CIConfig( Labels.CI_SET_INTEGRATION: LabelConfig( run_jobs=[ JobNames.STYLE_CHECK, - Build.PACKAGE_ASAN, Build.PACKAGE_RELEASE, - Build.PACKAGE_TSAN, - Build.PACKAGE_AARCH64, - JobNames.INTEGRATION_TEST_ASAN, - JobNames.INTEGRATION_TEST_ARM, JobNames.INTEGRATION_TEST, - JobNames.INTEGRATION_TEST_ASAN_ANALYZER, - JobNames.INTEGRATION_TEST_TSAN, - JobNames.INTEGRATION_TEST_FLAKY, ] ), Labels.CI_SET_REDUCED: LabelConfig( diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 1d96563251b..542f757ddd4 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -465,7 +465,7 @@ class ClickHouseCluster: self.base_cmd += ["--project-name", self.project_name] self.base_zookeeper_cmd = None - self.base_mysql_cmd = [] + self.base_mysql57_cmd = [] self.base_kafka_cmd = [] self.base_kerberized_kafka_cmd = [] self.base_kerberos_kdc_cmd = [] @@ -479,7 +479,7 @@ class ClickHouseCluster: self.with_zookeeper = False self.with_zookeeper_secure = False self.with_mysql_client = False - self.with_mysql = False + self.with_mysql57 = False self.with_mysql8 = False self.with_mysql_cluster = False self.with_postgres = False @@ -644,12 +644,19 @@ class ClickHouseCluster: self.mysql_client_host = "mysql_client" self.mysql_client_container = None - # available when with_mysql == True - self.mysql_host = "mysql57" - self.mysql_port = 3306 - self.mysql_ip = None - self.mysql_dir = p.abspath(p.join(self.instances_dir, "mysql")) - self.mysql_logs_dir = os.path.join(self.mysql_dir, "logs") + # available when with_mysql57 == True + self.mysql57_host = "mysql57" + self.mysql57_port = 3306 + self.mysql57_ip = None + self.mysql57_dir = p.abspath(p.join(self.instances_dir, "mysql")) + self.mysql57_logs_dir = os.path.join(self.mysql57_dir, "logs") + + # available when with_mysql8 == True + self.mysql8_host = "mysql80" + self.mysql8_port = 3306 + self.mysql8_ip = None + self.mysql8_dir = p.abspath(p.join(self.instances_dir, "mysql8")) + self.mysql8_logs_dir = os.path.join(self.mysql8_dir, "logs") # available when with_mysql_cluster == True self.mysql2_host = "mysql2" @@ -659,14 +666,7 @@ class ClickHouseCluster: self.mysql3_ip = None self.mysql4_ip = None self.mysql_cluster_dir = p.abspath(p.join(self.instances_dir, "mysql")) - self.mysql_cluster_logs_dir = os.path.join(self.mysql_dir, "logs") - - # available when with_mysql8 == True - self.mysql8_host = "mysql80" - self.mysql8_port = 3306 - self.mysql8_ip = None - self.mysql8_dir = p.abspath(p.join(self.instances_dir, "mysql8")) - self.mysql8_logs_dir = os.path.join(self.mysql8_dir, "logs") + self.mysql_cluster_logs_dir = os.path.join(self.mysql8_dir, "logs") # available when with_zookeper_secure == True self.zookeeper_secure_port = 2281 @@ -1045,17 +1045,17 @@ class ClickHouseCluster: return self.base_mysql_client_cmd - def setup_mysql_cmd(self, instance, env_variables, docker_compose_yml_dir): - self.with_mysql = True - env_variables["MYSQL_HOST"] = self.mysql_host - env_variables["MYSQL_PORT"] = str(self.mysql_port) + def setup_mysql57_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_mysql57 = True + env_variables["MYSQL_HOST"] = self.mysql57_host + env_variables["MYSQL_PORT"] = str(self.mysql57_port) env_variables["MYSQL_ROOT_HOST"] = "%" - env_variables["MYSQL_LOGS"] = self.mysql_logs_dir + env_variables["MYSQL_LOGS"] = self.mysql57_logs_dir env_variables["MYSQL_LOGS_FS"] = "bind" self.base_cmd.extend( ["--file", p.join(docker_compose_yml_dir, "docker_compose_mysql.yml")] ) - self.base_mysql_cmd = [ + self.base_mysql57_cmd = [ "docker-compose", "--env-file", instance.env_file, @@ -1065,7 +1065,7 @@ class ClickHouseCluster: p.join(docker_compose_yml_dir, "docker_compose_mysql.yml"), ] - return self.base_mysql_cmd + return self.base_mysql57_cmd def setup_mysql8_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_mysql8 = True @@ -1091,7 +1091,7 @@ class ClickHouseCluster: def setup_mysql_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_mysql_cluster = True - env_variables["MYSQL_CLUSTER_PORT"] = str(self.mysql_port) + env_variables["MYSQL_CLUSTER_PORT"] = str(self.mysql8_port) env_variables["MYSQL_CLUSTER_ROOT_HOST"] = "%" env_variables["MYSQL_CLUSTER_LOGS"] = self.mysql_cluster_logs_dir env_variables["MYSQL_CLUSTER_LOGS_FS"] = "bind" @@ -1572,7 +1572,7 @@ class ClickHouseCluster: with_zookeeper=False, with_zookeeper_secure=False, with_mysql_client=False, - with_mysql=False, + with_mysql57=False, with_mysql8=False, with_mysql_cluster=False, with_kafka=False, @@ -1676,7 +1676,7 @@ class ClickHouseCluster: with_zookeeper=with_zookeeper, zookeeper_config_path=self.zookeeper_config_path, with_mysql_client=with_mysql_client, - with_mysql=with_mysql, + with_mysql57=with_mysql57, with_mysql8=with_mysql8, with_mysql_cluster=with_mysql_cluster, with_kafka=with_kafka, @@ -1767,9 +1767,9 @@ class ClickHouseCluster: ) ) - if with_mysql and not self.with_mysql: + if with_mysql57 and not self.with_mysql57: cmds.append( - self.setup_mysql_cmd(instance, env_variables, docker_compose_yml_dir) + self.setup_mysql57_cmd(instance, env_variables, docker_compose_yml_dir) ) if with_mysql8 and not self.with_mysql8: @@ -1805,9 +1805,9 @@ class ClickHouseCluster: if with_odbc_drivers and not self.with_odbc_drivers: self.with_odbc_drivers = True - if not self.with_mysql: + if not self.with_mysql8: cmds.append( - self.setup_mysql_cmd( + self.setup_mysql8_cmd( instance, env_variables, docker_compose_yml_dir ) ) @@ -2148,8 +2148,8 @@ class ClickHouseCluster: logging.error("Can't connect to MySQL Client:{}".format(errors)) raise Exception("Cannot wait MySQL Client container") - def wait_mysql_to_start(self, timeout=180): - self.mysql_ip = self.get_instance_ip("mysql57") + def wait_mysql57_to_start(self, timeout=180): + self.mysql57_ip = self.get_instance_ip("mysql57") start = time.time() errors = [] while time.time() - start < timeout: @@ -2157,8 +2157,8 @@ class ClickHouseCluster: conn = pymysql.connect( user=mysql_user, password=mysql_pass, - host=self.mysql_ip, - port=self.mysql_port, + host=self.mysql57_ip, + port=self.mysql57_port, ) conn.close() logging.debug("Mysql Started") @@ -2205,7 +2205,7 @@ class ClickHouseCluster: user=mysql_user, password=mysql_pass, host=ip, - port=self.mysql_port, + port=self.mysql8_port, ) conn.close() logging.debug(f"Mysql Started {ip}") @@ -2752,15 +2752,15 @@ class ClickHouseCluster: subprocess_check_call(self.base_mysql_client_cmd + common_opts) self.wait_mysql_client_to_start() - if self.with_mysql and self.base_mysql_cmd: + if self.with_mysql57 and self.base_mysql57_cmd: logging.debug("Setup MySQL") - if os.path.exists(self.mysql_dir): - shutil.rmtree(self.mysql_dir) - os.makedirs(self.mysql_logs_dir) - os.chmod(self.mysql_logs_dir, stat.S_IRWXU | stat.S_IRWXO) - subprocess_check_call(self.base_mysql_cmd + common_opts) + if os.path.exists(self.mysql57_dir): + shutil.rmtree(self.mysql57_dir) + os.makedirs(self.mysql57_logs_dir) + os.chmod(self.mysql57_logs_dir, stat.S_IRWXU | stat.S_IRWXO) + subprocess_check_call(self.base_mysql57_cmd + common_opts) self.up_called = True - self.wait_mysql_to_start() + self.wait_mysql57_to_start() if self.with_mysql8 and self.base_mysql8_cmd: logging.debug("Setup MySQL 8") @@ -2775,7 +2775,7 @@ class ClickHouseCluster: print("Setup MySQL") if os.path.exists(self.mysql_cluster_dir): shutil.rmtree(self.mysql_cluster_dir) - os.makedirs(self.mysql_cluster_logs_dir) + os.makedirs(self.mysql_cluster_logs_dir, exist_ok=True) os.chmod(self.mysql_cluster_logs_dir, stat.S_IRWXU | stat.S_IRWXO) subprocess_check_call(self.base_mysql_cluster_cmd + common_opts) @@ -3239,7 +3239,7 @@ class ClickHouseInstance: with_zookeeper, zookeeper_config_path, with_mysql_client, - with_mysql, + with_mysql57, with_mysql8, with_mysql_cluster, with_kafka, @@ -3324,7 +3324,7 @@ class ClickHouseInstance: self.library_bridge_bin_path = library_bridge_bin_path self.with_mysql_client = with_mysql_client - self.with_mysql = with_mysql + self.with_mysql57 = with_mysql57 self.with_mysql8 = with_mysql8 self.with_mysql_cluster = with_mysql_cluster self.with_postgres = with_postgres @@ -3368,7 +3368,7 @@ class ClickHouseInstance: self.env_file = self.cluster.env_file if with_odbc_drivers: self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini" - self.with_mysql = True + self.with_mysql8 = True else: self.odbc_ini_path = "" @@ -4294,7 +4294,7 @@ class ClickHouseInstance: "Database": odbc_mysql_db, "Uid": odbc_mysql_uid, "Pwd": odbc_mysql_pass, - "Server": self.cluster.mysql_host, + "Server": self.cluster.mysql8_host, }, "PostgreSQL": { "DSN": "postgresql_odbc", @@ -4482,14 +4482,14 @@ class ClickHouseInstance: if self.with_mysql_client: depends_on.append(self.cluster.mysql_client_host) - if self.with_mysql: + if self.with_mysql57: depends_on.append("mysql57") if self.with_mysql8: depends_on.append("mysql80") if self.with_mysql_cluster: - depends_on.append("mysql57") + depends_on.append("mysql80") depends_on.append("mysql2") depends_on.append("mysql3") depends_on.append("mysql4") diff --git a/tests/integration/helpers/external_sources.py b/tests/integration/helpers/external_sources.py index cccf151e73e..033a2f84fa2 100644 --- a/tests/integration/helpers/external_sources.py +++ b/tests/integration/helpers/external_sources.py @@ -119,7 +119,7 @@ class SourceMySQL(ExternalSource): def prepare(self, structure, table_name, cluster): if self.internal_hostname is None: - self.internal_hostname = cluster.mysql_ip + self.internal_hostname = cluster.mysql8_ip self.create_mysql_conn() self.execute_mysql_query( "create database if not exists test default character set 'utf8'" diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py index 77b2c0741b5..afae8d616b0 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/test_mysql.py @@ -29,9 +29,9 @@ def setup_module(module): SOURCE = SourceMySQL( "MySQL", None, - cluster.mysql_port, - cluster.mysql_host, - cluster.mysql_port, + cluster.mysql8_port, + cluster.mysql8_host, + cluster.mysql8_port, "root", "clickhouse", ) @@ -53,7 +53,7 @@ def setup_module(module): dictionaries = simple_tester.list_dictionaries() node = cluster.add_instance( - "node", main_configs=main_configs, dictionaries=dictionaries, with_mysql=True + "node", main_configs=main_configs, dictionaries=dictionaries, with_mysql8=True ) diff --git a/tests/integration/test_dictionaries_ddl/test.py b/tests/integration/test_dictionaries_ddl/test.py index 7dda6fc245a..2f31e406735 100644 --- a/tests/integration/test_dictionaries_ddl/test.py +++ b/tests/integration/test_dictionaries_ddl/test.py @@ -13,7 +13,7 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", - with_mysql=True, + with_mysql8=True, dictionaries=["configs/dictionaries/simple_dictionary.xml"], main_configs=[ "configs/ssl_conf.xml", @@ -26,7 +26,7 @@ node1 = cluster.add_instance( ) node2 = cluster.add_instance( "node2", - with_mysql=True, + with_mysql8=True, dictionaries=["configs/dictionaries/simple_dictionary.xml"], main_configs=[ "configs/dictionaries/lazy_load.xml", @@ -117,7 +117,7 @@ def started_cluster(): ) def test_create_and_select_mysql(started_cluster, clickhouse, name, layout): mysql_conn = create_mysql_conn( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) execute_mysql_query(mysql_conn, "DROP DATABASE IF EXISTS create_and_select") execute_mysql_query(mysql_conn, "CREATE DATABASE create_and_select") @@ -152,7 +152,7 @@ def test_create_and_select_mysql(started_cluster, clickhouse, name, layout): DB 'create_and_select' TABLE '{}' REPLICA(PRIORITY 1 HOST '127.0.0.1' PORT 3333) - REPLICA(PRIORITY 2 HOST 'mysql57' PORT 3306) + REPLICA(PRIORITY 2 HOST 'mysql80' PORT 3306) )) {} LIFETIME(MIN 1 MAX 3) @@ -367,7 +367,7 @@ def test_file_dictionary_restrictions(started_cluster): def test_dictionary_with_where(started_cluster): mysql_conn = create_mysql_conn( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) execute_mysql_query( mysql_conn, "CREATE DATABASE IF NOT EXISTS dictionary_with_where" @@ -393,7 +393,7 @@ def test_dictionary_with_where(started_cluster): PASSWORD 'clickhouse' DB 'dictionary_with_where' TABLE 'special_table' - REPLICA(PRIORITY 1 HOST 'mysql57' PORT 3306) + REPLICA(PRIORITY 1 HOST 'mysql80' PORT 3306) WHERE 'value1 = \\'qweqwe\\' OR value1 = \\'\\\\u3232\\'' )) LAYOUT(FLAT()) diff --git a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml index d4d2466b7bf..737c4e0e54f 100644 --- a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml +++ b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict1.xml @@ -4,7 +4,7 @@ test - mysql57 + mysql80 3306 root clickhouse @@ -40,7 +40,7 @@ test - mysql57 + mysql80 3306 root clickhouse diff --git a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml index 031afbba636..ca65dba44e6 100644 --- a/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml +++ b/tests/integration/test_dictionaries_mysql/configs/dictionaries/mysql_dict2.xml @@ -4,7 +4,7 @@ test - mysql57 + mysql80 3306 root clickhouse @@ -41,7 +41,7 @@ test - mysql57 + mysql80 3306 root clickhouse @@ -77,7 +77,7 @@ test - mysql57 + mysql80 3306 root clickhouse diff --git a/tests/integration/test_dictionaries_mysql/configs/named_collections.xml b/tests/integration/test_dictionaries_mysql/configs/named_collections.xml index 6e4098c4e4a..0f1a06d7812 100644 --- a/tests/integration/test_dictionaries_mysql/configs/named_collections.xml +++ b/tests/integration/test_dictionaries_mysql/configs/named_collections.xml @@ -3,7 +3,7 @@ root clickhouse - mysql57 + mysql80 3306 test test_table
@@ -16,7 +16,7 @@ root clickhouse - mysql57 + mysql80 1111 test test_table
@@ -24,7 +24,7 @@ root clickhouse - mysql57 + mysql80 3306 test test_table
diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index ee0d957b8a9..360456b2046 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -14,7 +14,7 @@ instance = cluster.add_instance( "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, - with_mysql=True, + with_mysql8=True, dictionaries=DICTS, ) @@ -47,7 +47,7 @@ def started_cluster(): # Create database in ClickChouse using MySQL protocol (will be used for data insertion) instance.query( - "CREATE DATABASE clickhouse_mysql ENGINE = MySQL('mysql57:3306', 'test', 'root', 'clickhouse')" + "CREATE DATABASE clickhouse_mysql ENGINE = MySQL('mysql80:3306', 'test', 'root', 'clickhouse')" ) yield cluster @@ -86,7 +86,7 @@ def test_mysql_dictionaries_custom_query_full_load(started_cluster): PRIMARY KEY id LAYOUT(FLAT()) SOURCE(MYSQL( - HOST 'mysql57' + HOST 'mysql80' PORT 3306 USER 'root' PASSWORD 'clickhouse' @@ -135,7 +135,7 @@ def test_mysql_dictionaries_custom_query_partial_load_simple_key(started_cluster PRIMARY KEY id LAYOUT(DIRECT()) SOURCE(MYSQL( - HOST 'mysql57' + HOST 'mysql80' PORT 3306 USER 'root' PASSWORD 'clickhouse' @@ -186,7 +186,7 @@ def test_mysql_dictionaries_custom_query_partial_load_complex_key(started_cluste PRIMARY KEY id, id_key LAYOUT(COMPLEX_KEY_DIRECT()) SOURCE(MYSQL( - HOST 'mysql57' + HOST 'mysql80' PORT 3306 USER 'root' PASSWORD 'clickhouse' @@ -372,13 +372,13 @@ def get_mysql_conn(started_cluster): conn = pymysql.connect( user="root", password="clickhouse", - host=started_cluster.mysql_ip, - port=started_cluster.mysql_port, + host=started_cluster.mysql8_ip, + port=started_cluster.mysql8_port, ) else: conn.ping(reconnect=True) logging.debug( - f"MySQL Connection establised: {started_cluster.mysql_ip}:{started_cluster.mysql_port}" + f"MySQL Connection establised: {started_cluster.mysql8_ip}:{started_cluster.mysql8_port}" ) return conn except Exception as e: diff --git a/tests/integration/test_disabled_mysql_server/test.py b/tests/integration/test_disabled_mysql_server/test.py index 6a4df3fc0b4..814aebb0d8e 100644 --- a/tests/integration/test_disabled_mysql_server/test.py +++ b/tests/integration/test_disabled_mysql_server/test.py @@ -11,7 +11,7 @@ from helpers.network import PartitionManager cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance( - "node1", main_configs=["configs/remote_servers.xml"], with_mysql=True + "node1", main_configs=["configs/remote_servers.xml"], with_mysql8=True ) @@ -27,8 +27,8 @@ def started_cluster(): class MySQLNodeInstance: def __init__(self, started_cluster, user="root", password="clickhouse"): self.user = user - self.port = cluster.mysql_port - self.hostname = cluster.mysql_ip + self.port = cluster.mysql8_port + self.hostname = cluster.mysql8_ip self.password = password self.mysql_connection = None # lazy init @@ -62,7 +62,7 @@ def test_disabled_mysql_server(started_cluster): with PartitionManager() as pm: clickhouse_node.query( - "CREATE DATABASE test_db_disabled ENGINE = MySQL('mysql57:3306', 'test_db_disabled', 'root', 'clickhouse')" + "CREATE DATABASE test_db_disabled ENGINE = MySQL('mysql80:3306', 'test_db_disabled', 'root', 'clickhouse')" ) pm._add_rule( diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index ec34c181371..736f1b3cf71 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -175,7 +175,7 @@ def test_create_table(): password = new_password() table_engines = [ - f"MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", + f"MySQL('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", f"PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')", f"MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}')", f"S3('http://minio1:9001/root/data/test1.csv')", @@ -183,9 +183,9 @@ def test_create_table(): f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')", f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')", - f"MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", - f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')", - f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')", + f"MySQL(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", + f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql80', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')", + f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql80', port = 3306, table = 'mysql_table')", f"PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '{password}')", f"MongoDB(named_collection_5, host = 'mongo1', port = 5432, db = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '{password}')", f"S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '{password}', format = 'CSV')", @@ -219,7 +219,7 @@ def test_create_table(): assert ( node.query(f"SHOW CREATE TABLE table0 {show_secrets}={toggle}") == "CREATE TABLE default.table0\\n(\\n `x` Int32\\n)\\n" - "ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', " + "ENGINE = MySQL(\\'mysql80:3306\\', \\'mysql_db\\', " f"\\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')\n" ) @@ -228,16 +228,16 @@ def test_create_table(): ) == TSV( [ [ - "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql57:3306\\', \\'mysql_db\\', " + "CREATE TABLE default.table0 (`x` Int32) ENGINE = MySQL(\\'mysql80:3306\\', \\'mysql_db\\', " f"\\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')", - f"MySQL(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')", + f"MySQL(\\'mysql80:3306\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')", ], ] ) check_logs( must_contain=[ - "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", + "CREATE TABLE table0 (`x` int) ENGINE = MySQL('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", "CREATE TABLE table1 (`x` int) ENGINE = PostgreSQL('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')", "CREATE TABLE table2 (`x` int) ENGINE = MongoDB('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]')", "CREATE TABLE table3 (x int) ENGINE = S3('http://minio1:9001/root/data/test1.csv')", @@ -245,9 +245,9 @@ def test_create_table(): "CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", "CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')", "CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')", - "CREATE TABLE table8 (`x` int) ENGINE = MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", - "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '[HIDDEN]', table = 'mysql_table', user = 'mysql_user')", - "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')", + "CREATE TABLE table8 (`x` int) ENGINE = MySQL(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", + "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql80', port = 3306, password = '[HIDDEN]', table = 'mysql_table', user = 'mysql_user')", + "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql80', port = 3306, table = 'mysql_table')", "CREATE TABLE table11 (`x` int) ENGINE = PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '[HIDDEN]')", "CREATE TABLE table12 (`x` int) ENGINE = MongoDB(named_collection_5, host = 'mongo1', port = 5432, db = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '[HIDDEN]'", "CREATE TABLE table13 (`x` int) ENGINE = S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]', format = 'CSV')", @@ -320,7 +320,7 @@ def test_table_functions(): password = new_password() table_functions = [ - f"mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", + f"mysql('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')", f"postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '{password}')", f"mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '{password}', 'x int')", f"s3('http://minio1:9001/root/data/test1.csv')", @@ -345,7 +345,7 @@ def test_table_functions(): f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())", f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')", f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())", - f"mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", + f"mysql(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", f"postgresql(named_collection_2, password = '{password}', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')", f"s3(named_collection_2, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '{password}')", f"remote(named_collection_6, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}', sharding_key = rand())", @@ -377,7 +377,7 @@ def test_table_functions(): assert ( node.query(f"SHOW CREATE TABLE tablefunc0 {show_secrets}={toggle}") == "CREATE TABLE default.tablefunc0\\n(\\n `x` Int32\\n) AS " - "mysql(\\'mysql57:3306\\', \\'mysql_db\\', \\'mysql_table\\', " + "mysql(\\'mysql80:3306\\', \\'mysql_db\\', \\'mysql_table\\', " f"\\'mysql_user\\', \\'{secret}\\')\n" ) @@ -387,7 +387,7 @@ def test_table_functions(): ) == TSV( [ [ - "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql57:3306\\', " + "CREATE TABLE default.tablefunc0 (`x` Int32) AS mysql(\\'mysql80:3306\\', " f"\\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'{secret}\\')", "", ], @@ -396,7 +396,7 @@ def test_table_functions(): check_logs( must_contain=[ - "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql57:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", + "CREATE TABLE tablefunc0 (`x` int) AS mysql('mysql80:3306', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", "CREATE TABLE tablefunc1 (`x` int) AS postgresql('postgres1:5432', 'postgres_db', 'postgres_table', 'postgres_user', '[HIDDEN]')", "CREATE TABLE tablefunc2 (`x` int) AS mongodb('mongo1:27017', 'mongo_db', 'mongo_col', 'mongo_user', '[HIDDEN]', 'x int')", "CREATE TABLE tablefunc3 (x int) AS s3('http://minio1:9001/root/data/test1.csv')", @@ -421,7 +421,7 @@ def test_table_functions(): "CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())", "CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')", "CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())", - "CREATE TABLE tablefunc25 (`x` int) AS mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", + "CREATE TABLE tablefunc25 (`x` int) AS mysql(named_collection_1, host = 'mysql80', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", "CREATE TABLE tablefunc26 (`x` int) AS postgresql(named_collection_2, password = '[HIDDEN]', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')", "CREATE TABLE tablefunc27 (`x` int) AS s3(named_collection_2, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]')", "CREATE TABLE tablefunc28 (`x` int) AS remote(named_collection_6, addresses_expr = '127.{2..11}', database = 'default', table = 'remote_table', user = 'remote_user', password = '[HIDDEN]', sharding_key = rand())", @@ -567,31 +567,31 @@ def test_on_cluster(): password = new_password() node.query( - f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')" + f"CREATE TABLE table_oncl ON CLUSTER 'test_shard_localhost' (x int) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '{password}')" ) check_logs( must_contain=[ - "CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", + "CREATE TABLE table_oncl ON CLUSTER test_shard_localhost (`x` int) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '[HIDDEN]')", ], must_not_contain=[password], ) # Check logs of DDLWorker during executing of this query. assert node.contains_in_log( - "DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + "DDLWorker: Processing task .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" ) assert node.contains_in_log( - "DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + "DDLWorker: Executing query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" ) assert node.contains_in_log( - "executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + "executeQuery: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" ) assert node.contains_in_log( - "DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql57:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" + "DDLWorker: Executed query: .*CREATE TABLE default\\.table_oncl UUID '[0-9a-fA-F-]*' (\\`x\\` Int32) ENGINE = MySQL('mysql80:3307', 'mysql_db', 'mysql_table', 'mysql_user', '\\[HIDDEN\\]')" ) assert system_query_log_contains_search_pattern( - "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql57:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')" + "%CREATE TABLE default.table_oncl UUID \\'%\\' (`x` Int32) ENGINE = MySQL(\\'mysql80:3307\\', \\'mysql_db\\', \\'mysql_table\\', \\'mysql_user\\', \\'[HIDDEN]\\')" ) node.query("DROP TABLE table_oncl") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 727188a4b86..89c69c42adc 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -21,7 +21,7 @@ node_db = cluster.add_instance( "node1", main_configs=["configs/timezone_config.xml"], user_configs=["configs/users.xml"], - with_mysql=True, + with_mysql57=True, with_mysql8=True, stay_alive=True, ) @@ -29,14 +29,16 @@ node_disable_bytes_settings = cluster.add_instance( "node2", main_configs=["configs/timezone_config.xml"], user_configs=["configs/users_disable_bytes_settings.xml"], - with_mysql=False, + with_mysql57=False, + with_mysql8=False, stay_alive=True, ) node_disable_rows_settings = cluster.add_instance( "node3", main_configs=["configs/timezone_config.xml"], user_configs=["configs/users_disable_rows_settings.xml"], - with_mysql=False, + with_mysql57=False, + with_mysql8=False, stay_alive=True, ) @@ -125,7 +127,7 @@ class MySQLConnection: @pytest.fixture(scope="module") def started_mysql_5_7(): mysql_node = MySQLConnection( - cluster.mysql_port, "root", "clickhouse", cluster.mysql_ip + cluster.mysql57_port, "root", "clickhouse", cluster.mysql57_ip ) yield mysql_node diff --git a/tests/integration/test_mysql57_database_engine/__init__.py b/tests/integration/test_mysql57_database_engine/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_mysql57_database_engine/configs/named_collections.xml b/tests/integration/test_mysql57_database_engine/configs/named_collections.xml new file mode 100644 index 00000000000..3b65536f20f --- /dev/null +++ b/tests/integration/test_mysql57_database_engine/configs/named_collections.xml @@ -0,0 +1,23 @@ + + + + root + clickhouse + mysql57 + 3306 + test_database + + + postgres + mysecretpassword + postgres1 + + + root + clickhouse + mysql57 + 1111 + clickhouse + + + diff --git a/tests/integration/test_mysql57_database_engine/configs/remote_servers.xml b/tests/integration/test_mysql57_database_engine/configs/remote_servers.xml new file mode 100644 index 00000000000..9c7f02c190f --- /dev/null +++ b/tests/integration/test_mysql57_database_engine/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + node1 + 9000 + + + + + diff --git a/tests/integration/test_mysql57_database_engine/configs/user.xml b/tests/integration/test_mysql57_database_engine/configs/user.xml new file mode 100644 index 00000000000..775c63350b0 --- /dev/null +++ b/tests/integration/test_mysql57_database_engine/configs/user.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_mysql57_database_engine/configs/users.xml b/tests/integration/test_mysql57_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mysql57_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mysql57_database_engine/test.py b/tests/integration/test_mysql57_database_engine/test.py new file mode 100644 index 00000000000..a5a13a88b1b --- /dev/null +++ b/tests/integration/test_mysql57_database_engine/test.py @@ -0,0 +1,1074 @@ +import contextlib +import time +from string import Template + +import pymysql.cursors +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager + +cluster = ClickHouseCluster(__file__) +clickhouse_node = cluster.add_instance( + "node1", + main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_mysql57=True, + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +class MySQLNodeInstance: + def __init__(self, user, password, hostname, port): + self.user = user + self.port = port + self.hostname = hostname + self.password = password + self.mysql_connection = None # lazy init + self.ip_address = hostname + + def query(self, execution_query): + if self.mysql_connection is None: + self.mysql_connection = pymysql.connect( + user=self.user, + password=self.password, + host=self.hostname, + port=self.port, + ) + with self.mysql_connection.cursor() as cursor: + + def execute(query): + res = cursor.execute(query) + if query.lstrip().lower().startswith(("select", "show")): + # Mimic output of the ClickHouseInstance, which is: + # tab-sparated values and newline (\n)-separated rows. + rows = [] + for row in cursor.fetchall(): + rows.append("\t".join(str(item) for item in row)) + res = "\n".join(rows) + return res + + if isinstance(execution_query, (str, bytes)): + return execute(execution_query) + else: + return [execute(q) for q in execution_query] + + def close(self): + if self.mysql_connection is not None: + self.mysql_connection.close() + + +def test_mysql_ddl_for_mysql_database(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" + ) + assert "test_database" in clickhouse_node.query("SHOW DATABASES") + + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database") + + time.sleep( + 3 + ) # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained + mysql_node.query( + "ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11)" + ) + assert "add_column" in clickhouse_node.query( + "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'test_database'" + ) + + time.sleep( + 3 + ) # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained + mysql_node.query( + "ALTER TABLE `test_database`.`test_table` DROP COLUMN `add_column`" + ) + assert "add_column" not in clickhouse_node.query( + "SELECT name FROM system.columns WHERE table = 'test_table' AND database = 'test_database'" + ) + + mysql_node.query("DROP TABLE `test_database`.`test_table`;") + assert "test_table" not in clickhouse_node.query( + "SHOW TABLES FROM test_database" + ) + + clickhouse_node.query("DROP DATABASE test_database") + assert "test_database" not in clickhouse_node.query("SHOW DATABASES") + + mysql_node.query("DROP DATABASE test_database") + + +def test_clickhouse_ddl_for_mysql_database(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" + ) + + assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database") + clickhouse_node.query("DROP TABLE test_database.test_table") + assert "test_table" not in clickhouse_node.query( + "SHOW TABLES FROM test_database" + ) + clickhouse_node.query("ATTACH TABLE test_database.test_table") + assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database") + clickhouse_node.query("DETACH TABLE test_database.test_table") + assert "test_table" not in clickhouse_node.query( + "SHOW TABLES FROM test_database" + ) + clickhouse_node.query("ATTACH TABLE test_database.test_table") + assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database") + + clickhouse_node.query("DROP DATABASE test_database") + assert "test_database" not in clickhouse_node.query("SHOW DATABASES") + + mysql_node.query("DROP DATABASE test_database") + + +def test_clickhouse_dml_for_mysql_database(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `i``d` int(11) NOT NULL, PRIMARY KEY (`i``d`)) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test_database, 'root', 'clickhouse')" + ) + + assert ( + clickhouse_node.query( + "SELECT count() FROM `test_database`.`test_table`" + ).rstrip() + == "0" + ) + clickhouse_node.query( + "INSERT INTO `test_database`.`test_table`(`i``d`) select number from numbers(10000)" + ) + assert ( + clickhouse_node.query( + "SELECT count() FROM `test_database`.`test_table`" + ).rstrip() + == "10000" + ) + + clickhouse_node.query("DROP DATABASE test_database") + assert "test_database" not in clickhouse_node.query("SHOW DATABASES") + + mysql_node.query("DROP DATABASE test_database") + + +def test_clickhouse_join_for_mysql_database(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test") + mysql_node.query("CREATE DATABASE test DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE test.t1_mysql_local (" + "pays VARCHAR(55) DEFAULT 'FRA' NOT NULL," + "service VARCHAR(5) DEFAULT '' NOT NULL," + "opco CHAR(3) DEFAULT '' NOT NULL" + ")" + ) + mysql_node.query( + "CREATE TABLE test.t2_mysql_local (" + "service VARCHAR(5) DEFAULT '' NOT NULL," + "opco VARCHAR(5) DEFAULT ''" + ")" + ) + clickhouse_node.query("DROP TABLE IF EXISTS default.t1_remote_mysql SYNC") + clickhouse_node.query("DROP TABLE IF EXISTS default.t2_remote_mysql SYNC") + clickhouse_node.query( + "CREATE TABLE default.t1_remote_mysql AS mysql('mysql57:3306','test','t1_mysql_local','root','clickhouse')" + ) + clickhouse_node.query( + "CREATE TABLE default.t2_remote_mysql AS mysql('mysql57:3306','test','t2_mysql_local','root','clickhouse')" + ) + clickhouse_node.query( + "INSERT INTO `default`.`t1_remote_mysql` VALUES ('EN','A',''),('RU','B','AAA')" + ) + clickhouse_node.query( + "INSERT INTO `default`.`t2_remote_mysql` VALUES ('A','AAA'),('Z','')" + ) + + assert ( + clickhouse_node.query( + "SELECT s.pays " + "FROM default.t1_remote_mysql AS s " + "LEFT JOIN default.t1_remote_mysql AS s_ref " + "ON (s_ref.opco = s.opco AND s_ref.service = s.service) " + "WHERE s_ref.opco != '' AND s.opco != '' " + ).rstrip() + == "RU" + ) + mysql_node.query("DROP DATABASE test") + + +def test_bad_arguments_for_mysql_database_engine(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + port=started_cluster.mysql57_port, + ) + ) as mysql_node: + with pytest.raises(QueryRuntimeException) as exception: + mysql_node.query( + "CREATE DATABASE IF NOT EXISTS test_bad_arguments DEFAULT CHARACTER SET 'utf8'" + ) + clickhouse_node.query( + "CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql57:3306', test_bad_arguments, root, 'clickhouse')" + ) + assert "Database engine MySQL requested literal argument." in str( + exception.value + ) + mysql_node.query("DROP DATABASE test_bad_arguments") + + +def test_column_comments_for_mysql_database_engine(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" + ) + assert "test_database" in clickhouse_node.query("SHOW DATABASES") + + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`), `test` int COMMENT 'test comment') ENGINE=InnoDB;" + ) + assert "test comment" in clickhouse_node.query( + "DESCRIBE TABLE `test_database`.`test_table`" + ) + + time.sleep( + 3 + ) # Because the unit of MySQL modification time is seconds, modifications made in the same second cannot be obtained + mysql_node.query( + "ALTER TABLE `test_database`.`test_table` ADD COLUMN `add_column` int(11) COMMENT 'add_column comment'" + ) + assert "add_column comment" in clickhouse_node.query( + "SELECT comment FROM system.columns WHERE table = 'test_table' AND database = 'test_database'" + ) + + clickhouse_node.query("DROP DATABASE test_database") + mysql_node.query("DROP DATABASE test_database") + + +def test_data_types_support_level_for_mysql_database_engine(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test") + mysql_node.query( + "CREATE DATABASE IF NOT EXISTS test DEFAULT CHARACTER SET 'utf8'" + ) + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse')", + settings={"mysql_datatypes_support_level": "decimal,datetime64"}, + ) + + assert ( + "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" + in clickhouse_node.query("SHOW CREATE DATABASE test_database FORMAT TSV") + ) + clickhouse_node.query("DETACH DATABASE test_database") + + # without context settings + clickhouse_node.query("ATTACH DATABASE test_database") + assert ( + "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" + in clickhouse_node.query("SHOW CREATE DATABASE test_database FORMAT TSV") + ) + + clickhouse_node.query( + "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'", + settings={"mysql_datatypes_support_level": "decimal"}, + ) + + assert ( + "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" + in clickhouse_node.query("SHOW CREATE DATABASE test_database_1 FORMAT TSV") + ) + clickhouse_node.query("DETACH DATABASE test_database_1") + + # without context settings + clickhouse_node.query("ATTACH DATABASE test_database_1") + assert ( + "SETTINGS mysql_datatypes_support_level = \\'decimal,datetime64\\'" + in clickhouse_node.query("SHOW CREATE DATABASE test_database_1 FORMAT TSV") + ) + + clickhouse_node.query("DROP DATABASE test_database") + clickhouse_node.query("DROP DATABASE test_database_1") + assert "test_database" not in clickhouse_node.query("SHOW DATABASES") + mysql_node.query("DROP DATABASE test") + + +float_values = [0, "NULL"] +clickhouse_float_values = [0, "\\N"] +int32_values = [0, 1, -1, 2147483647, -2147483648] +uint32_values = [ + 0, + 1, +] # [FIXME] seems client have issue with value 4294967295, it returns -1 for it +mint_values = [0, 1, -1, 8388607, -8388608] +umint_values = [0, 1, 16777215] +int16_values = [0, 1, -1, 32767, -32768] +uint16_values = [0, 1, 65535] +int8_values = [0, 1, -1, 127, -128] +uint8_values = [0, 1, 255] +string_values = ["'ClickHouse'", "NULL"] +clickhouse_string_values = ["ClickHouse", "\\N"] +date_values = ["'1970-01-01'"] +date2Date32_values = ["'1925-01-01'", "'2283-11-11'"] +date2String_values = ["'1000-01-01'", "'9999-12-31'"] + + +decimal_values = [ + 0, + 0.123, + 0.4, + 5.67, + 8.91011, + 123456789.123, + -0.123, + -0.4, + -5.67, + -8.91011, + -123456789.123, +] +timestamp_values = ["'2015-05-18 07:40:01.123'", "'2019-09-16 19:20:11.123'"] +timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11'"] + + +def arryToString(expected_clickhouse_values): + return "\n".join(str(value) for value in expected_clickhouse_values) + + +# if expected_clickhouse_values is "", compare MySQL and ClickHouse query results directly +@pytest.mark.parametrize( + "case_name, mysql_type, expected_ch_type, mysql_values, expected_clickhouse_values, setting_mysql_datatypes_support_level", + [ + pytest.param( + "common_types", + "FLOAT", + "Nullable(Float32)", + float_values, + clickhouse_float_values, + "", + id="float_1", + ), + pytest.param( + "common_types", + "FLOAT UNSIGNED", + "Nullable(Float32)", + float_values, + clickhouse_float_values, + "", + id="float_2", + ), + pytest.param( + "common_types", + "INT", + "Nullable(Int32)", + int32_values, + int32_values, + "", + id="common_types_1", + ), + pytest.param( + "common_types", + "INT NOT NULL", + "Int32", + int32_values, + int32_values, + "", + id="common_types_2", + ), + pytest.param( + "common_types", + "INT UNSIGNED NOT NULL", + "UInt32", + uint32_values, + uint32_values, + "", + id="common_types_3", + ), + pytest.param( + "common_types", + "INT UNSIGNED", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_4", + ), + pytest.param( + "common_types", + "INT UNSIGNED DEFAULT NULL", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_5", + ), + pytest.param( + "common_types", + "INT UNSIGNED DEFAULT '1'", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_6", + ), + pytest.param( + "common_types", + "INT(10)", + "Nullable(Int32)", + int32_values, + int32_values, + "", + id="common_types_7", + ), + pytest.param( + "common_types", + "INT(10) NOT NULL", + "Int32", + int32_values, + int32_values, + "", + id="common_types_8", + ), + pytest.param( + "common_types", + "INT(10) UNSIGNED NOT NULL", + "UInt32", + uint32_values, + uint32_values, + "", + id="common_types_8", + ), + pytest.param( + "common_types", + "INT(10) UNSIGNED", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_9", + ), + pytest.param( + "common_types", + "INT(10) UNSIGNED DEFAULT NULL", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_10", + ), + pytest.param( + "common_types", + "INT(10) UNSIGNED DEFAULT '1'", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_11", + ), + pytest.param( + "common_types", + "INTEGER", + "Nullable(Int32)", + int32_values, + int32_values, + "", + id="common_types_12", + ), + pytest.param( + "common_types", + "INTEGER UNSIGNED", + "Nullable(UInt32)", + uint32_values, + uint32_values, + "", + id="common_types_13", + ), + pytest.param( + "common_types", + "MEDIUMINT", + "Nullable(Int32)", + mint_values, + mint_values, + "", + id="common_types_14", + ), + pytest.param( + "common_types", + "MEDIUMINT UNSIGNED", + "Nullable(UInt32)", + umint_values, + umint_values, + "", + id="common_types_15", + ), + pytest.param( + "common_types", + "SMALLINT", + "Nullable(Int16)", + int16_values, + int16_values, + "", + id="common_types_16", + ), + pytest.param( + "common_types", + "SMALLINT UNSIGNED", + "Nullable(UInt16)", + uint16_values, + uint16_values, + "", + id="common_types_17", + ), + pytest.param( + "common_types", + "TINYINT", + "Nullable(Int8)", + int8_values, + int8_values, + "", + id="common_types_18", + ), + pytest.param( + "common_types", + "TINYINT UNSIGNED", + "Nullable(UInt8)", + uint8_values, + uint8_values, + "", + id="common_types_19", + ), + pytest.param( + "common_types", + "VARCHAR(10)", + "Nullable(String)", + string_values, + clickhouse_string_values, + "", + id="common_types_20", + ), + pytest.param( + "common_types", + "DATE", + "Nullable(Date)", + date_values, + "", + "", + id="common_types_21", + ), + pytest.param( + "common_types", + "DATE", + "Nullable(Date32)", + date2Date32_values, + "", + "date2Date32", + id="common_types_22", + ), + pytest.param( + "common_types", + "DATE", + "Nullable(String)", + date2String_values, + "", + "date2String", + id="common_types_23", + ), + pytest.param( + "common_types", + "binary(1)", + "Nullable(FixedString(1))", + [1], + [1], + "", + id="common_types_24", + ), + pytest.param( + "common_types", + "binary(0)", + "Nullable(FixedString(1))", + ["NULL"], + ["\\N"], + "", + id="common_types_25", + ), + pytest.param( + "decimal_default", + "decimal NOT NULL", + "Decimal(10, 0)", + decimal_values, + "", + "decimal,datetime64", + id="decimal_1", + ), + pytest.param( + "decimal_default_nullable", + "decimal", + "Nullable(Decimal(10, 0))", + decimal_values, + "", + "decimal,datetime64", + id="decimal_2", + ), + pytest.param( + "decimal_18_6", + "decimal(18, 6) NOT NULL", + "Decimal(18, 6)", + decimal_values, + "", + "decimal,datetime64", + id="decimal_3", + ), + pytest.param( + "decimal_38_6", + "decimal(38, 6) NOT NULL", + "Decimal(38, 6)", + decimal_values, + "", + "decimal,datetime64", + id="decimal_4", + ), + # Due to python DB driver roundtrip MySQL timestamp and datetime values + # are printed with 6 digits after decimal point, so to simplify tests a bit, + # we only validate precision of 0 and 6. + pytest.param( + "timestamp_default", + "timestamp", + "DateTime", + timestamp_values, + "", + "decimal,datetime64", + id="timestamp_default", + ), + pytest.param( + "timestamp_6", + "timestamp(6)", + "DateTime64(6)", + timestamp_values, + "", + "decimal,datetime64", + id="timestamp_6", + ), + pytest.param( + "datetime_default", + "DATETIME NOT NULL", + "DateTime64(0)", + timestamp_values, + "", + "decimal,datetime64", + id="datetime_default", + ), + pytest.param( + "datetime_6", + "DATETIME(6) NOT NULL", + "DateTime64(6)", + timestamp_values, + "", + "decimal,datetime64", + id="datetime_6_1", + ), + pytest.param( + "decimal_40_6", + "decimal(40, 6) NOT NULL", + "Decimal(40, 6)", + decimal_values, + "", + "decimal,datetime64", + id="decimal_40_6", + ), + pytest.param( + "decimal_18_6", + "decimal(18, 6) NOT NULL", + "String", + decimal_values, + "", + "datetime64", + id="decimal_18_6_1", + ), + pytest.param( + "decimal_18_6", + "decimal(18, 6) NOT NULL", + "String", + decimal_values, + "", + "", + id="decimal_18_6_2", + ), + pytest.param( + "datetime_6", + "DATETIME(6) NOT NULL", + "DateTime", + timestamp_values_no_subsecond, + "", + "decimal", + id="datetime_6_2", + ), + pytest.param( + "datetime_6", + "DATETIME(6) NOT NULL", + "DateTime", + timestamp_values_no_subsecond, + "", + "", + id="datetime_6_3", + ), + ], +) +def test_mysql_types( + started_cluster, + case_name, + mysql_type, + expected_ch_type, + mysql_values, + expected_clickhouse_values, + setting_mysql_datatypes_support_level, +): + """Verify that values written to MySQL can be read on ClickHouse side via DB engine MySQL, + or Table engine MySQL, or mysql() table function. + Make sure that type is converted properly and values match exactly. + """ + + substitutes = dict( + mysql_db="decimal_support", + table_name=case_name, + mysql_type=mysql_type, + mysql_values=", ".join("({})".format(x) for x in mysql_values), + ch_mysql_db="mysql_db", + ch_mysql_table="mysql_table_engine_" + case_name, + expected_ch_type=expected_ch_type, + ) + + clickhouse_query_settings = dict( + mysql_datatypes_support_level=setting_mysql_datatypes_support_level, + output_format_decimal_trailing_zeros=1, + ) + + def execute_query(node, query, **kwargs): + def do_execute(query): + query = Template(query).safe_substitute(substitutes) + res = node.query(query, **kwargs) + return res if isinstance(res, int) else res.rstrip("\n\r") + + if isinstance(query, (str, bytes)): + return do_execute(query) + else: + return [do_execute(q) for q in query] + + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + port=started_cluster.mysql57_port, + ) + ) as mysql_node: + execute_query( + mysql_node, + [ + "DROP DATABASE IF EXISTS ${mysql_db}", + "CREATE DATABASE ${mysql_db} DEFAULT CHARACTER SET 'utf8'", + "CREATE TABLE `${mysql_db}`.`${table_name}` (value ${mysql_type})", + "INSERT INTO `${mysql_db}`.`${table_name}` (value) VALUES ${mysql_values}", + "SELECT * FROM `${mysql_db}`.`${table_name}`", + "FLUSH TABLES", + ], + ) + + assert execute_query( + mysql_node, "SELECT COUNT(*) FROM ${mysql_db}.${table_name}" + ) == "{}".format(len(mysql_values)) + + # MySQL TABLE ENGINE + execute_query( + clickhouse_node, + [ + "DROP TABLE IF EXISTS ${ch_mysql_table};", + "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", + ], + settings=clickhouse_query_settings, + ) + + # Validate type + assert ( + execute_query( + clickhouse_node, + "SELECT toTypeName(value) FROM ${ch_mysql_table} LIMIT 1", + settings=clickhouse_query_settings, + ) + == expected_ch_type + ) + + expected_format_clickhouse_values = arryToString(expected_clickhouse_values) + if expected_format_clickhouse_values == "": + expected_format_clickhouse_values = execute_query( + mysql_node, "SELECT value FROM ${mysql_db}.${table_name}" + ) + + # Validate values + assert expected_format_clickhouse_values == execute_query( + clickhouse_node, + "SELECT value FROM ${ch_mysql_table}", + settings=clickhouse_query_settings, + ) + + # MySQL DATABASE ENGINE + execute_query( + clickhouse_node, + [ + "DROP DATABASE IF EXISTS ${ch_mysql_db}", + "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql57:3306', '${mysql_db}', 'root', 'clickhouse')", + ], + settings=clickhouse_query_settings, + ) + + # Validate type + assert ( + execute_query( + clickhouse_node, + "SELECT toTypeName(value) FROM ${ch_mysql_db}.${table_name} LIMIT 1", + settings=clickhouse_query_settings, + ) + == expected_ch_type + ) + + # Validate values + assert expected_format_clickhouse_values == execute_query( + clickhouse_node, + "SELECT value FROM ${ch_mysql_db}.${table_name}", + settings=clickhouse_query_settings, + ) + + # MySQL TABLE FUNCTION + # Validate type + assert ( + execute_query( + clickhouse_node, + "SELECT toTypeName(value) FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1", + settings=clickhouse_query_settings, + ) + == expected_ch_type + ) + + # Validate values + assert expected_format_clickhouse_values == execute_query( + clickhouse_node, + "SELECT value FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", + settings=clickhouse_query_settings, + ) + + +def test_predefined_connection_configuration(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MySQL(mysql1)") + clickhouse_node.query( + "INSERT INTO `test_database`.`test_table` select number from numbers(100)" + ) + assert ( + clickhouse_node.query( + "SELECT count() FROM `test_database`.`test_table`" + ).rstrip() + == "100" + ) + + result = clickhouse_node.query("show create table test_database.test_table") + assert ( + result.strip() + == "CREATE TABLE test_database.test_table\\n(\\n `id` Int32\\n)\\nENGINE = MySQL(mysql1, table = \\'test_table\\')" + ) + + clickhouse_node.query("DROP DATABASE test_database") + clickhouse_node.query_and_get_error( + "CREATE DATABASE test_database ENGINE = MySQL(mysql2)" + ) + clickhouse_node.query_and_get_error( + "CREATE DATABASE test_database ENGINE = MySQL(unknown_collection)" + ) + clickhouse_node.query_and_get_error( + "CREATE DATABASE test_database ENGINE = MySQL(mysql1, 1)" + ) + + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL(mysql1, port=3306)" + ) + assert ( + clickhouse_node.query( + "SELECT count() FROM `test_database`.`test_table`" + ).rstrip() + == "100" + ) + + +def test_restart_server(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_restart") + clickhouse_node.query("DROP DATABASE IF EXISTS test_restart") + clickhouse_node.query_and_get_error( + "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')" + ) + assert "test_restart" not in clickhouse_node.query("SHOW DATABASES") + + mysql_node.query("CREATE DATABASE test_restart DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_restart`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + clickhouse_node.query( + "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')" + ) + + assert "test_restart" in clickhouse_node.query("SHOW DATABASES") + assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_restart") + + with PartitionManager() as pm: + pm.partition_instances( + clickhouse_node, mysql_node, action="REJECT --reject-with tcp-reset" + ) + clickhouse_node.restart_clickhouse() + clickhouse_node.query_and_get_error("SHOW TABLES FROM test_restart") + assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_restart") + + +def test_memory_leak(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + ) + clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`") + + clickhouse_node.query("DROP DATABASE test_database") + clickhouse_node.restart_clickhouse() + + +def test_password_leak(started_cluster): + with contextlib.closing( + MySQLNodeInstance( + "root", + "clickhouse", + started_cluster.mysql57_ip, + started_cluster.mysql57_port, + ) + ) as mysql_node: + mysql_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + mysql_node.query( + "CREATE TABLE `test_database`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" + ) + + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + ) + assert "clickhouse" not in clickhouse_node.query( + "SHOW CREATE test_database.test_table" + ) diff --git a/tests/integration/test_mysql_database_engine/configs/named_collections.xml b/tests/integration/test_mysql_database_engine/configs/named_collections.xml index 3b65536f20f..22be308b5a8 100644 --- a/tests/integration/test_mysql_database_engine/configs/named_collections.xml +++ b/tests/integration/test_mysql_database_engine/configs/named_collections.xml @@ -3,7 +3,7 @@ root clickhouse - mysql57 + mysql80 3306 test_database @@ -15,7 +15,7 @@ root clickhouse - mysql57 + mysql80 1111 clickhouse diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 00b5eb9e8aa..64a38679121 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -13,7 +13,7 @@ clickhouse_node = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], user_configs=["configs/users.xml"], - with_mysql=True, + with_mysql8=True, stay_alive=True, ) @@ -70,7 +70,7 @@ class MySQLNodeInstance: def test_mysql_ddl_for_mysql_database(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -78,7 +78,7 @@ def test_mysql_ddl_for_mysql_database(started_cluster): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse')" ) assert "test_database" in clickhouse_node.query("SHOW DATABASES") @@ -121,7 +121,7 @@ def test_mysql_ddl_for_mysql_database(started_cluster): def test_clickhouse_ddl_for_mysql_database(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -132,7 +132,7 @@ def test_clickhouse_ddl_for_mysql_database(started_cluster): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse')" ) assert "test_table" in clickhouse_node.query("SHOW TABLES FROM test_database") @@ -158,7 +158,7 @@ def test_clickhouse_ddl_for_mysql_database(started_cluster): def test_clickhouse_dml_for_mysql_database(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -169,7 +169,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test_database, 'root', 'clickhouse')" + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', test_database, 'root', 'clickhouse')" ) assert ( @@ -197,7 +197,7 @@ def test_clickhouse_dml_for_mysql_database(started_cluster): def test_clickhouse_join_for_mysql_database(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test") @@ -218,10 +218,10 @@ def test_clickhouse_join_for_mysql_database(started_cluster): clickhouse_node.query("DROP TABLE IF EXISTS default.t1_remote_mysql SYNC") clickhouse_node.query("DROP TABLE IF EXISTS default.t2_remote_mysql SYNC") clickhouse_node.query( - "CREATE TABLE default.t1_remote_mysql AS mysql('mysql57:3306','test','t1_mysql_local','root','clickhouse')" + "CREATE TABLE default.t1_remote_mysql AS mysql('mysql80:3306','test','t1_mysql_local','root','clickhouse')" ) clickhouse_node.query( - "CREATE TABLE default.t2_remote_mysql AS mysql('mysql57:3306','test','t2_mysql_local','root','clickhouse')" + "CREATE TABLE default.t2_remote_mysql AS mysql('mysql80:3306','test','t2_mysql_local','root','clickhouse')" ) clickhouse_node.query( "INSERT INTO `default`.`t1_remote_mysql` VALUES ('EN','A',''),('RU','B','AAA')" @@ -248,8 +248,8 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster): MySQLNodeInstance( "root", "clickhouse", - started_cluster.mysql_ip, - port=started_cluster.mysql_port, + started_cluster.mysql8_ip, + port=started_cluster.mysql8_port, ) ) as mysql_node: with pytest.raises(QueryRuntimeException) as exception: @@ -257,7 +257,7 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster): "CREATE DATABASE IF NOT EXISTS test_bad_arguments DEFAULT CHARACTER SET 'utf8'" ) clickhouse_node.query( - "CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql57:3306', test_bad_arguments, root, 'clickhouse')" + "CREATE DATABASE test_database_bad_arguments ENGINE = MySQL('mysql80:3306', test_bad_arguments, root, 'clickhouse')" ) assert "Database engine MySQL requested literal argument." in str( exception.value @@ -268,7 +268,7 @@ def test_bad_arguments_for_mysql_database_engine(started_cluster): def test_column_comments_for_mysql_database_engine(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -276,7 +276,7 @@ def test_column_comments_for_mysql_database_engine(started_cluster): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse')" + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse')" ) assert "test_database" in clickhouse_node.query("SHOW DATABASES") @@ -304,7 +304,7 @@ def test_column_comments_for_mysql_database_engine(started_cluster): def test_data_types_support_level_for_mysql_database_engine(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test") @@ -313,7 +313,7 @@ def test_data_types_support_level_for_mysql_database_engine(started_cluster): ) clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse')", + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', test, 'root', 'clickhouse')", settings={"mysql_datatypes_support_level": "decimal,datetime64"}, ) @@ -331,7 +331,7 @@ def test_data_types_support_level_for_mysql_database_engine(started_cluster): ) clickhouse_node.query( - "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql57:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'", + "CREATE DATABASE test_database_1 ENGINE = MySQL('mysql80:3306', test, 'root', 'clickhouse') SETTINGS mysql_datatypes_support_level = 'decimal,datetime64'", settings={"mysql_datatypes_support_level": "decimal"}, ) @@ -693,7 +693,7 @@ def arryToString(expected_clickhouse_values): pytest.param( "timestamp_default", "timestamp", - "DateTime", + "Nullable(DateTime)", timestamp_values, "", "decimal,datetime64", @@ -702,7 +702,7 @@ def arryToString(expected_clickhouse_values): pytest.param( "timestamp_6", "timestamp(6)", - "DateTime64(6)", + "Nullable(DateTime64(6))", timestamp_values, "", "decimal,datetime64", @@ -817,8 +817,8 @@ def test_mysql_types( MySQLNodeInstance( "root", "clickhouse", - started_cluster.mysql_ip, - port=started_cluster.mysql_port, + started_cluster.mysql8_ip, + port=started_cluster.mysql8_port, ) ) as mysql_node: execute_query( @@ -842,7 +842,7 @@ def test_mysql_types( clickhouse_node, [ "DROP TABLE IF EXISTS ${ch_mysql_table};", - "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", + "CREATE TABLE ${ch_mysql_table} (value ${expected_ch_type}) ENGINE = MySQL('mysql80:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", ], settings=clickhouse_query_settings, ) @@ -875,7 +875,7 @@ def test_mysql_types( clickhouse_node, [ "DROP DATABASE IF EXISTS ${ch_mysql_db}", - "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql57:3306', '${mysql_db}', 'root', 'clickhouse')", + "CREATE DATABASE ${ch_mysql_db} ENGINE = MySQL('mysql80:3306', '${mysql_db}', 'root', 'clickhouse')", ], settings=clickhouse_query_settings, ) @@ -902,7 +902,7 @@ def test_mysql_types( assert ( execute_query( clickhouse_node, - "SELECT toTypeName(value) FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1", + "SELECT toTypeName(value) FROM mysql('mysql80:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse') LIMIT 1", settings=clickhouse_query_settings, ) == expected_ch_type @@ -911,7 +911,7 @@ def test_mysql_types( # Validate values assert expected_format_clickhouse_values == execute_query( clickhouse_node, - "SELECT value FROM mysql('mysql57:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", + "SELECT value FROM mysql('mysql80:3306', '${mysql_db}', '${table_name}', 'root', 'clickhouse')", settings=clickhouse_query_settings, ) @@ -919,7 +919,7 @@ def test_mysql_types( def test_predefined_connection_configuration(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -971,13 +971,13 @@ def test_predefined_connection_configuration(started_cluster): def test_restart_server(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_restart") clickhouse_node.query("DROP DATABASE IF EXISTS test_restart") clickhouse_node.query_and_get_error( - "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')" + "CREATE DATABASE test_restart ENGINE = MySQL('mysql80:3306', 'test_restart', 'root', 'clickhouse')" ) assert "test_restart" not in clickhouse_node.query("SHOW DATABASES") @@ -986,7 +986,7 @@ def test_restart_server(started_cluster): "CREATE TABLE `test_restart`.`test_table` ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;" ) clickhouse_node.query( - "CREATE DATABASE test_restart ENGINE = MySQL('mysql57:3306', 'test_restart', 'root', 'clickhouse')" + "CREATE DATABASE test_restart ENGINE = MySQL('mysql80:3306', 'test_restart', 'root', 'clickhouse')" ) assert "test_restart" in clickhouse_node.query("SHOW DATABASES") @@ -1004,7 +1004,7 @@ def test_restart_server(started_cluster): def test_memory_leak(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -1015,7 +1015,7 @@ def test_memory_leak(started_cluster): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" ) clickhouse_node.query("SELECT count() FROM `test_database`.`test_table`") @@ -1026,7 +1026,7 @@ def test_memory_leak(started_cluster): def test_password_leak(started_cluster): with contextlib.closing( MySQLNodeInstance( - "root", "clickhouse", started_cluster.mysql_ip, started_cluster.mysql_port + "root", "clickhouse", started_cluster.mysql8_ip, started_cluster.mysql8_port ) ) as mysql_node: mysql_node.query("DROP DATABASE IF EXISTS test_database") @@ -1037,7 +1037,7 @@ def test_password_leak(started_cluster): clickhouse_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query( - "CREATE DATABASE test_database ENGINE = MySQL('mysql57:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" + "CREATE DATABASE test_database ENGINE = MySQL('mysql80:3306', 'test_database', 'root', 'clickhouse') SETTINGS connection_auto_close = 1" ) assert "clickhouse" not in clickhouse_node.query( "SHOW CREATE test_database.test_table" diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index e8b3ba3fcf3..06cbe70f7c6 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -14,7 +14,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", with_odbc_drivers=True, - with_mysql=True, + with_mysql8=True, with_postgres=True, main_configs=["configs/openssl.xml", "configs/odbc_logging.xml"], dictionaries=[ @@ -55,13 +55,13 @@ def get_mysql_conn(): conn = pymysql.connect( user="root", password="clickhouse", - host=cluster.mysql_ip, - port=cluster.mysql_port, + host=cluster.mysql8_ip, + port=cluster.mysql8_port, ) else: conn.ping(reconnect=True) logging.debug( - f"MySQL Connection establised: {cluster.mysql_ip}:{cluster.mysql_port}" + f"MySQL Connection establised: {cluster.mysql8_ip}:{cluster.mysql8_port}" ) return conn except Exception as e: @@ -230,7 +230,7 @@ def test_mysql_simple_select_works(started_cluster): node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nullable(UInt32)) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nullable(UInt32)) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) diff --git a/tests/integration/test_storage_mysql/configs/named_collections.xml b/tests/integration/test_storage_mysql/configs/named_collections.xml index 04117f32d4b..d20630eaeb3 100644 --- a/tests/integration/test_storage_mysql/configs/named_collections.xml +++ b/tests/integration/test_storage_mysql/configs/named_collections.xml @@ -3,7 +3,7 @@ root clickhouse - mysql57 + mysql80 3306 clickhouse test_table
@@ -16,7 +16,7 @@ root clickhouse - mysql57 + mysql80 1111 clickhouse test_table
@@ -24,7 +24,7 @@ root clickhouse - mysql57 + mysql80 3306 clickhouse test_table
@@ -33,7 +33,7 @@ root clickhouse - mysql57 + mysql80 3306 clickhouse 1 diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index e2257026dc7..9818a8183d7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -14,7 +14,7 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], user_configs=["configs/users.xml"], - with_mysql=True, + with_mysql8=True, ) node2 = cluster.add_instance( "node2", main_configs=["configs/remote_servers.xml"], with_mysql_cluster=True @@ -23,7 +23,7 @@ node3 = cluster.add_instance( "node3", main_configs=["configs/remote_servers.xml"], user_configs=["configs/users.xml"], - with_mysql=True, + with_mysql8=True, ) create_table_sql_template = """ @@ -43,7 +43,7 @@ drop_table_sql_template = """ def get_mysql_conn(started_cluster, host): conn = pymysql.connect( - user="root", password="clickhouse", host=host, port=started_cluster.mysql_port + user="root", password="clickhouse", host=host, port=started_cluster.mysql8_port ) return conn @@ -69,7 +69,7 @@ def started_cluster(): try: cluster.start() - conn = get_mysql_conn(cluster, cluster.mysql_ip) + conn = get_mysql_conn(cluster, cluster.mysql8_ip) create_mysql_db(conn, "clickhouse") ## create mysql db and table @@ -85,13 +85,13 @@ def test_many_connections(started_cluster): table_name = "test_many_connections" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -116,13 +116,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_insert_select(started_cluster): table_name = "test_insert_select" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -142,13 +142,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_replace_select(started_cluster): table_name = "test_replace_select" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); """.format( table_name, table_name ) @@ -173,13 +173,13 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_insert_on_duplicate_select(started_cluster): table_name = "test_insert_on_duplicate_select" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 0, 'update money = money + values(money)'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 0, 'update money = money + values(money)'); """.format( table_name, table_name ) @@ -205,12 +205,12 @@ def test_where(started_cluster): table_name = "test_where" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -264,11 +264,11 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL def test_table_function(started_cluster): - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, "table_function") create_mysql_table(conn, "table_function") table_function = ( - "mysql('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( + "mysql('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( "table_function" ) ) @@ -309,7 +309,7 @@ def test_table_function(started_cluster): def test_schema_inference(started_cluster): - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, "inference_table") with conn.cursor() as cursor: @@ -317,7 +317,7 @@ def test_schema_inference(started_cluster): "CREATE TABLE clickhouse.inference_table (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" ) - parameters = "'mysql57:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" + parameters = "'mysql80:3306', 'clickhouse', 'inference_table', 'root', 'clickhouse'" node1.query( f"CREATE TABLE mysql_schema_inference_engine ENGINE=MySQL({parameters})" @@ -335,7 +335,7 @@ def test_schema_inference(started_cluster): def test_binary_type(started_cluster): - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, "binary_type") with conn.cursor() as cursor: @@ -343,7 +343,7 @@ def test_binary_type(started_cluster): "CREATE TABLE clickhouse.binary_type (id INT PRIMARY KEY, data BINARY(16) NOT NULL)" ) table_function = ( - "mysql('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( + "mysql('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse')".format( "binary_type" ) ) @@ -363,12 +363,12 @@ def test_enum_type(started_cluster): table_name = "test_enum_type" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node1.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('IP' = 1, 'URL' = 2)) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8('IP' = 1, 'URL' = 2)) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse', 1); """.format( table_name, table_name ) @@ -388,7 +388,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, source Enum8(' def test_mysql_distributed(started_cluster): table_name = "test_replicas" - conn1 = get_mysql_conn(started_cluster, started_cluster.mysql_ip) + conn1 = get_mysql_conn(started_cluster, started_cluster.mysql8_ip) conn2 = get_mysql_conn(started_cluster, started_cluster.mysql2_ip) conn3 = get_mysql_conn(started_cluster, started_cluster.mysql3_ip) conn4 = get_mysql_conn(started_cluster, started_cluster.mysql4_ip) @@ -422,7 +422,7 @@ def test_mysql_distributed(started_cluster): CREATE TABLE test_replica{} (id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql{}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse');""".format( - i, 57 if i == 1 else i + i, 80 if i == 1 else i ) ) nodes[i - 1].query( @@ -433,11 +433,11 @@ def test_mysql_distributed(started_cluster): # test multiple ports parsing result = node2.query( - """SELECT DISTINCT(name) FROM mysql('mysql{57|2|3}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ + """SELECT DISTINCT(name) FROM mysql('mysql{80|2|3}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ ) assert result == "host1\n" or result == "host2\n" or result == "host3\n" result = node2.query( - """SELECT DISTINCT(name) FROM mysql('mysql57:3306|mysql2:3306|mysql3:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ + """SELECT DISTINCT(name) FROM mysql('mysql80:3306|mysql2:3306|mysql3:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ ) assert result == "host1\n" or result == "host2\n" or result == "host3\n" @@ -457,7 +457,7 @@ def test_mysql_distributed(started_cluster): """ CREATE TABLE test_shards (id UInt32, name String, age UInt32, money UInt32) - ENGINE = ExternalDistributed('MySQL', 'mysql{57|2}:3306,mysql{3|4}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ + ENGINE = ExternalDistributed('MySQL', 'mysql{80|2}:3306,mysql{3|4}:3306', 'clickhouse', 'test_replicas', 'root', 'clickhouse'); """ ) # Check only one replica in each shard is used @@ -472,24 +472,24 @@ def test_mysql_distributed(started_cluster): result = node2.query(query) assert result == "host1\nhost2\nhost3\nhost4\n" - # disconnect mysql57 - started_cluster.pause_container("mysql57") + # disconnect mysql + started_cluster.pause_container("mysql80") result = node2.query("SELECT DISTINCT(name) FROM test_shards ORDER BY name") - started_cluster.unpause_container("mysql57") + started_cluster.unpause_container("mysql80") assert result == "host2\nhost4\n" or result == "host3\nhost4\n" def test_external_settings(started_cluster): table_name = "test_external_settings" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, started_cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, started_cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) node3.query(f"DROP TABLE IF EXISTS {table_name}") node3.query( """ -CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse'); """.format( table_name, table_name ) @@ -521,7 +521,7 @@ def test_settings_connection_wait_timeout(started_cluster): node1.query(f"DROP TABLE IF EXISTS {table_name}") wait_timeout = 2 - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -534,7 +534,7 @@ def test_settings_connection_wait_timeout(started_cluster): age UInt32, money UInt32 ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse') SETTINGS connection_wait_timeout={}, connection_pool_size=1 """.format( table_name, table_name, wait_timeout @@ -584,7 +584,7 @@ def test_settings_connection_wait_timeout(started_cluster): def test_predefined_connection_configuration(started_cluster): - conn = get_mysql_conn(started_cluster, started_cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, started_cluster.mysql8_ip) table_name = "test_table" drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -671,7 +671,7 @@ def test_mysql_in(started_cluster): table_name = "test_mysql_in" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -684,7 +684,7 @@ def test_mysql_in(started_cluster): age UInt32, money UInt32 ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse') """.format( table_name, table_name ) @@ -714,7 +714,7 @@ def test_mysql_null(started_cluster): table_name = "test_mysql_in" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) with conn.cursor() as cursor: cursor.execute( @@ -735,7 +735,7 @@ def test_mysql_null(started_cluster): id UInt32, money Nullable(UInt32) ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{}', 'root', 'clickhouse') """.format( table_name, table_name ) @@ -780,7 +780,7 @@ def test_settings(started_cluster): connect_timeout = 10123002 connection_pool_size = 1 - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) create_mysql_table(conn, table_name) @@ -793,7 +793,7 @@ def test_settings(started_cluster): age UInt32, money UInt32 ) - ENGINE = MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse') + ENGINE = MySQL('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse') SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, read_write_timeout={rw_timeout}, connection_pool_size={connection_pool_size} """ ) @@ -815,7 +815,7 @@ def test_settings(started_cluster): node1.query( f""" SELECT * - FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse', + FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse', SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, @@ -843,7 +843,7 @@ def test_settings(started_cluster): connect_timeout = 50123002 node1.query( f""" - CREATE DATABASE mm ENGINE = MySQL('mysql57:3306', 'clickhouse', 'root', 'clickhouse') + CREATE DATABASE mm ENGINE = MySQL('mysql80:3306', 'clickhouse', 'root', 'clickhouse') SETTINGS connection_wait_timeout={wait_timeout}, connect_timeout={connect_timeout}, @@ -863,7 +863,7 @@ def test_mysql_point(started_cluster): table_name = "test_mysql_point" node1.query(f"DROP TABLE IF EXISTS {table_name}") - conn = get_mysql_conn(started_cluster, cluster.mysql_ip) + conn = get_mysql_conn(started_cluster, cluster.mysql8_ip) drop_mysql_table(conn, table_name) with conn.cursor() as cursor: cursor.execute( @@ -882,25 +882,25 @@ def test_mysql_point(started_cluster): conn.commit() result = node1.query( - f"DESCRIBE mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"DESCRIBE mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ) assert result.strip() == "id\tInt32\t\t\t\t\t\npoint\tPoint" assert 1 == int( node1.query( - f"SELECT count() FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"SELECT count() FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ) ) assert ( "(15,20)" == node1.query( - f"SELECT point FROM mysql('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"SELECT point FROM mysql('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ).strip() ) node1.query("DROP TABLE IF EXISTS test") node1.query( - f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql57:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" + f"CREATE TABLE test (id Int32, point Point) Engine=MySQL('mysql80:3306', 'clickhouse', '{table_name}', 'root', 'clickhouse')" ) assert "(15,20)" == node1.query(f"SELECT point FROM test").strip() From ae91c655a003ee73ef6e983c4d28d492f4a8ecbc Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 21 Feb 2024 14:36:58 +0300 Subject: [PATCH 130/145] Tables system.backups and system.backup_log add query_id and error stacktrace --- src/Backups/BackupOperationInfo.h | 3 +++ src/Backups/BackupsWorker.cpp | 22 ++++++++++++++++---- src/Backups/BackupsWorker.h | 3 ++- src/Interpreters/BackupLog.cpp | 2 ++ src/Storages/System/StorageSystemBackups.cpp | 3 +++ 5 files changed, 28 insertions(+), 5 deletions(-) diff --git a/src/Backups/BackupOperationInfo.h b/src/Backups/BackupOperationInfo.h index e57b57d75f1..21b5284458c 100644 --- a/src/Backups/BackupOperationInfo.h +++ b/src/Backups/BackupOperationInfo.h @@ -20,6 +20,9 @@ struct BackupOperationInfo /// Base Backup Operation name, a string like "Disk('backups', 'my_base_backup')" String base_backup_name; + /// Query ID of a query that started backup + String query_id; + /// This operation is internal and should not be shown in system.backups bool internal = false; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index c19be22c749..5905d723800 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -440,7 +440,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context try { - addInfo(backup_id, backup_name_for_logging, base_backup_name, backup_settings.internal, context->getProcessListElement(), BackupStatus::CREATING_BACKUP); + addInfo(backup_id, + backup_name_for_logging, + base_backup_name, + context->getCurrentQueryId(), + backup_settings.internal, + context->getProcessListElement(), + BackupStatus::CREATING_BACKUP); /// Prepare context to use. ContextPtr context_in_use = context; @@ -823,7 +829,13 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt if (restore_settings.base_backup_info) base_backup_name = restore_settings.base_backup_info->toStringForLogging(); - addInfo(restore_id, backup_name_for_logging, base_backup_name, restore_settings.internal, context->getProcessListElement(), BackupStatus::RESTORING); + addInfo(restore_id, + backup_name_for_logging, + base_backup_name, + context->getCurrentQueryId(), + restore_settings.internal, + context->getProcessListElement(), + BackupStatus::RESTORING); /// Prepare context to use. ContextMutablePtr context_in_use = context; @@ -1108,13 +1120,15 @@ void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr } -void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, bool internal, QueryStatusPtr process_list_element, BackupStatus status) +void BackupsWorker::addInfo(const OperationID & id, const String & name, const String & base_backup_name, const String & query_id, + bool internal, QueryStatusPtr process_list_element, BackupStatus status) { ExtendedOperationInfo extended_info; auto & info = extended_info.info; info.id = id; info.name = name; info.base_backup_name = base_backup_name; + info.query_id = query_id; info.internal = internal; info.status = status; info.start_time = std::chrono::system_clock::now(); @@ -1183,7 +1197,7 @@ void BackupsWorker::setStatus(const String & id, BackupStatus status, bool throw if (isFailedOrCancelled(status)) { - info.error_message = getCurrentExceptionMessage(false); + info.error_message = getCurrentExceptionMessage(true /*with_stacktrace*/); info.exception = std::current_exception(); } diff --git a/src/Backups/BackupsWorker.h b/src/Backups/BackupsWorker.h index 73c8bf19473..ad187552c31 100644 --- a/src/Backups/BackupsWorker.h +++ b/src/Backups/BackupsWorker.h @@ -108,7 +108,8 @@ private: /// Run data restoring tasks which insert data to tables. void restoreTablesData(const BackupOperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool, QueryStatusPtr process_list_element); - void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, bool internal, QueryStatusPtr process_list_element, BackupStatus status); + void addInfo(const BackupOperationID & id, const String & name, const String & base_backup_name, const String & query_id, + bool internal, QueryStatusPtr process_list_element, BackupStatus status); void setStatus(const BackupOperationID & id, BackupStatus status, bool throw_if_error = true); void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); } void setNumFilesAndSize(const BackupOperationID & id, size_t num_files, UInt64 total_size, size_t num_entries, diff --git a/src/Interpreters/BackupLog.cpp b/src/Interpreters/BackupLog.cpp index d34e982ffc3..d5b69bc0728 100644 --- a/src/Interpreters/BackupLog.cpp +++ b/src/Interpreters/BackupLog.cpp @@ -28,6 +28,7 @@ ColumnsDescription BackupLogElement::getColumnsDescription() {"id", std::make_shared()}, {"name", std::make_shared()}, {"base_backup_name", std::make_shared()}, + {"query_id", std::make_shared()}, {"status", std::make_shared(getBackupStatusEnumValues())}, {"error", std::make_shared()}, {"start_time", std::make_shared()}, @@ -51,6 +52,7 @@ void BackupLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(info.id); columns[i++]->insert(info.name); columns[i++]->insert(info.base_backup_name); + columns[i++]->insert(info.query_id); columns[i++]->insert(static_cast(info.status)); columns[i++]->insert(info.error_message); columns[i++]->insert(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 17fb56e0a92..0063d9e308f 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -22,6 +22,7 @@ ColumnsDescription StorageSystemBackups::getColumnsDescription() {"id", std::make_shared(), "Operation ID, can be either passed via SETTINGS id=... or be randomly generated UUID."}, {"name", std::make_shared(), "Operation name, a string like `Disk('backups', 'my_backup')`"}, {"base_backup_name", std::make_shared(), "Base Backup Operation name, a string like `Disk('backups', 'my_base_backup')`"}, + {"query_id", std::make_shared(), "Query ID of a query that started backup."}, {"status", std::make_shared(getBackupStatusEnumValues()), "Status of backup or restore operation."}, {"error", std::make_shared(), "The error message if any."}, {"start_time", std::make_shared(), "The time when operation started."}, @@ -44,6 +45,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con auto & column_id = assert_cast(*res_columns[column_index++]); auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_base_backup_name = assert_cast(*res_columns[column_index++]); + auto & column_query_id = assert_cast(*res_columns[column_index++]); auto & column_status = assert_cast(*res_columns[column_index++]); auto & column_error = assert_cast(*res_columns[column_index++]); auto & column_start_time = assert_cast(*res_columns[column_index++]); @@ -62,6 +64,7 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con column_id.insertData(info.id.data(), info.id.size()); column_name.insertData(info.name.data(), info.name.size()); column_base_backup_name.insertData(info.base_backup_name.data(), info.base_backup_name.size()); + column_query_id.insertData(info.query_id.data(), info.query_id.size()); column_status.insertValue(static_cast(info.status)); column_error.insertData(info.error_message.data(), info.error_message.size()); column_start_time.insertValue(static_cast(std::chrono::system_clock::to_time_t(info.start_time))); From 8f0f8bf29433dd27d352fa6c15defdcdcf8dc4f3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Feb 2024 11:54:20 +0000 Subject: [PATCH 131/145] Fix typo --- src/Functions/array/arrayDotProduct.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 12b2ce428ee..6c615a058c3 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -130,7 +130,7 @@ struct DotProduct /// The implementation is modeled after the implementation of distance functions arrayL1Distance, arrayL2Distance, etc. -/// The main difference is that arrayDotProduct() interfers the result type differently. +/// The main difference is that arrayDotProduct() interferes the result type differently. template class FunctionArrayScalarProduct : public IFunction { From 1ac94813ed49d08ba1c007a7e182b618583d6a59 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 21 Feb 2024 14:14:45 +0000 Subject: [PATCH 132/145] review fixes --- src/Common/Scheduler/Nodes/FairPolicy.h | 1 + src/Common/Scheduler/Nodes/FifoQueue.h | 10 +++++----- src/Common/Scheduler/Nodes/PriorityPolicy.h | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index 53740e7a543..ce2bf729a04 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -134,6 +134,7 @@ public: std::pair dequeueRequest() override { + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` while (true) { if (heap_size == 0) diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 2adb7241314..45ed32343ff 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -39,7 +39,7 @@ public: void enqueueRequest(ResourceRequest * request) override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); @@ -49,7 +49,7 @@ public: std::pair dequeueRequest() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (requests.empty()) return {nullptr, false}; ResourceRequest * result = requests.front(); @@ -64,7 +64,7 @@ public: bool cancelRequest(ResourceRequest * request) override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N) for (auto i = requests.begin(), e = requests.end(); i != e; ++i) { @@ -84,7 +84,7 @@ public: bool isActive() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return !requests.empty(); } @@ -117,7 +117,7 @@ public: std::pair getQueueLengthAndCost() { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return {requests.size(), queue_cost}; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index fd02ea3df62..9b4cfc37f8c 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -102,6 +102,7 @@ public: std::pair dequeueRequest() override { + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` while (true) { if (items.empty()) From 5ba371662f0cca9e87eb0eab82fd9a4d916494a8 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 21 Feb 2024 18:10:27 +0300 Subject: [PATCH 133/145] Backups delete suspicious file --- src/Backups/.BackupCoordinationLocal.cpp.pHKoqj | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/Backups/.BackupCoordinationLocal.cpp.pHKoqj diff --git a/src/Backups/.BackupCoordinationLocal.cpp.pHKoqj b/src/Backups/.BackupCoordinationLocal.cpp.pHKoqj deleted file mode 100644 index e69de29bb2d..00000000000 From 47ab81bbabc7a5c9773ba4f54e698e5a6e200bfd Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Feb 2024 14:55:48 +0000 Subject: [PATCH 134/145] CI: support for random job pick #no_merge_commit #no_ci_cache --- tests/ci/ci.py | 37 ++++++++++++++++++++++++++++++++----- tests/ci/ci_config.py | 22 ++++++++++++---------- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 4d2b124a32c..320a0ef42d5 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -6,12 +6,13 @@ from enum import Enum import json import logging import os +import random import re import subprocess import sys import time from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Union +from typing import Any, Dict, List, Optional, Sequence, Set, Union import docker_images_helper import upload_result_helper @@ -1107,6 +1108,7 @@ def _configure_jobs( ci_cache.print_status() jobs_to_wait: Dict[str, Dict[str, Any]] = {} + randomization_buckets = {} # type: Dict[str, Set[str]] for job in digests: digest = digests[job] @@ -1115,11 +1117,18 @@ def _configure_jobs( batches_to_do: List[int] = [] add_to_skip = False + if job_config.pr_only and pr_info.is_release_branch(): + continue + if job_config.release_only and not pr_info.is_release_branch(): + continue + + # fill job randomization buckets (for jobs with configured @random_bucket property)) + if job_config.random_bucket: + if not job_config.random_bucket in randomization_buckets: + randomization_buckets[job_config.random_bucket] = set() + randomization_buckets[job_config.random_bucket].add(job) + for batch in range(num_batches): # type: ignore - if job_config.pr_only and pr_info.is_release_branch(): - continue - if job_config.release_only and not pr_info.is_release_branch(): - continue if job_config.run_by_label: # this job controlled by label, add to todo if its label is set in pr if job_config.run_by_label in pr_info.labels: @@ -1167,6 +1176,24 @@ def _configure_jobs( "num_batches": num_batches, } + if not pr_info.is_release_branch(): + # randomization bucket filtering (pick one random job from each bucket, for jobs with configured random_bucket property) + for _, jobs in randomization_buckets.items(): + jobs_to_remove_randomization = set() + bucket_ = list(jobs) + random.shuffle(bucket_) + while len(bucket_) > 1: + random_job = bucket_.pop() + if random_job in jobs_to_do: + jobs_to_remove_randomization.add(random_job) + if jobs_to_remove_randomization: + print( + f"Following jobs will be removed due to randomization bucket: [{jobs_to_remove_randomization}]" + ) + jobs_to_do = [ + job for job in jobs_to_do if job not in jobs_to_remove_randomization + ] + ## c. check CI controlling labels and commit messages if pr_info.labels: jobs_requested_by_label = [] # type: List[str] diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 3ebcbb7ed59..ccae8dd1383 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -198,6 +198,8 @@ class JobConfig: pr_only: bool = False # job is for release/master branches only release_only: bool = False + # to randomly pick and run one job among jobs in the same @random_bucket. Applied in PR branches only. + random_bucket: str = "" @dataclass @@ -993,29 +995,29 @@ CI_CONFIG = CIConfig( Build.PACKAGE_TSAN, job_config=JobConfig(num_batches=5, **statless_test_common_params), # type: ignore ), - JobNames.STRESS_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**stress_test_common_params) # type: ignore + JobNames.STRESS_TEST_DEBUG: TestConfig( + Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_TSAN: TestConfig( Build.PACKAGE_TSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore ), + JobNames.STRESS_TEST_ASAN: TestConfig( + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore + ), JobNames.STRESS_TEST_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.STRESS_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stress_test_common_params) # type: ignore - ), - JobNames.STRESS_TEST_DEBUG: TestConfig( - Build.PACKAGE_DEBUG, job_config=JobConfig(**stress_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="stress_with_sanitizer", **stress_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(pr_only=True, random_bucket="upgrade_with_sanitizer", **upgrade_test_common_params) # type: ignore ), JobNames.UPGRADE_TEST_DEBUG: TestConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(pr_only=True, **upgrade_test_common_params) # type: ignore From c05c3944d11071201b216271679c0accf7db64aa Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 21 Feb 2024 16:49:53 +0100 Subject: [PATCH 135/145] Hide sensitive info for s3queue --- src/Parsers/ASTFunction.cpp | 2 +- tests/integration/test_mask_sensitive_info/test.py | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index ae9b8ddbe85..ba4c7db96e6 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -380,7 +380,7 @@ namespace findMySQLFunctionSecretArguments(); } else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS") || - (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg")) + (engine_name == "DeltaLake") || (engine_name == "Hudi") || (engine_name == "Iceberg") || (engine_name == "S3Queue")) { /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) findS3TableEngineSecretArguments(); diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index ec34c181371..d57960c629a 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -195,6 +195,10 @@ def test_create_table(): f"DeltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", "DNS_ERROR", ), + f"S3Queue('http://minio1:9001/root/data/', 'CSV')", + f"S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip')", + f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV')", + f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV', 'gzip')", ] def make_test_case(i): @@ -254,6 +258,10 @@ def test_create_table(): "CREATE TABLE table14 (x int) ENGINE = S3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV', 'gzip')", "CREATE TABLE table15 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '[HIDDEN]')", "CREATE TABLE table16 (`x` int) ENGINE = DeltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", + "CREATE TABLE table17 (x int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'CSV')", + "CREATE TABLE table18 (x int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip')", + "CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV')", + "CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip')", ], must_not_contain=[password], ) From f8274692073b90d9cf14659f3f6300e5083adba7 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Feb 2024 16:08:03 +0000 Subject: [PATCH 136/145] CI: random sanitizer for parallel repl in PR wf --- tests/ci/ci_config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index ccae8dd1383..d78005bcc19 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -934,16 +934,16 @@ CI_CONFIG = CIConfig( Build.PACKAGE_DEBUG, job_config=JobConfig(**stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_ASAN: TestConfig( - Build.PACKAGE_ASAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_ASAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_MSAN: TestConfig( - Build.PACKAGE_MSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_MSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_UBSAN: TestConfig( - Build.PACKAGE_UBSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_UBSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: TestConfig( - Build.PACKAGE_TSAN, job_config=JobConfig(**stateful_test_common_params) # type: ignore + Build.PACKAGE_TSAN, job_config=JobConfig(random_bucket="parrepl_with_sanitizer", **stateful_test_common_params) # type: ignore ), # End stateful tests for parallel replicas JobNames.STATELESS_TEST_ASAN: TestConfig( From 5ea3afb06ee38a6bebbeb25ffe3281741ad67fe2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Feb 2024 16:29:47 +0000 Subject: [PATCH 137/145] Docs: Correct variable names for simpleLinearRegression Fixes: #59729 --- .../aggregate-functions/reference/simplelinearregression.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index bcff05ada47..ea3dbff8691 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -13,8 +13,8 @@ simpleLinearRegression(x, y) Parameters: -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. +- `x` — Column with explanatory variable values. +- `y` — Column with dependent variable values. Returned values: From da50758eb60d353e4055bbd8a3378b9dd63b1fd0 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Feb 2024 16:59:41 +0000 Subject: [PATCH 138/145] CI: use aarch style checker for CI config job #do_not_test --- .github/workflows/backport_branches.yml | 2 +- .github/workflows/master.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/pull_request.yml | 2 +- .github/workflows/release_branches.yml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 6b05f1fe9f4..51670087ffe 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -11,7 +11,7 @@ on: # yamllint disable-line rule:truthy - 'backport/**' jobs: RunConfig: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 24daca44da6..7cb5455ed73 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -11,7 +11,7 @@ on: # yamllint disable-line rule:truthy - 'master' jobs: RunConfig: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 770e1ec3789..93ac2be19b4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -14,7 +14,7 @@ jobs: # The task for having a preserved ENV and event.json for later investigation uses: ./.github/workflows/debug.yml RunConfig: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c9cf5ab90dd..1afcdab938b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -18,7 +18,7 @@ on: # yamllint disable-line rule:truthy ########################################################################################## jobs: RunConfig: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index c076c2209ec..57e90d79ebd 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -14,7 +14,7 @@ on: # yamllint disable-line rule:truthy jobs: RunConfig: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, style-checker-aarch64] outputs: data: ${{ steps.runconfig.outputs.CI_DATA }} steps: From 07c9deed41a835e37a0a3098141576a83fe935a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 21 Feb 2024 18:40:00 +0100 Subject: [PATCH 139/145] Make cloud sync required --- tests/ci/ci_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d78005bcc19..0418e71aaf5 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -1139,6 +1139,7 @@ CI_CONFIG.validate() # checks required by Mergeable Check REQUIRED_CHECKS = [ "PR Check", + "A Sync", # Cloud sync JobNames.BUILD_CHECK, JobNames.BUILD_CHECK_SPECIAL, JobNames.DOCS_CHECK, From 5fc28c536cefac8aa0e58f3c2f5b78f2650a2d8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 21 Feb 2024 19:18:47 +0100 Subject: [PATCH 140/145] Generate a conflict --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 433195af9c3..dcfe145b04c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -640,7 +640,7 @@ class IColumn; M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \ M(Bool, describe_include_virtual_columns, false, "If true, virtual columns of table will be included into result of DESCRIBE query", 0) \ M(Bool, describe_compact_output, false, "If true, include only column names and types into result of DESCRIBE query", 0) \ - M(Bool, apply_mutations_on_fly, false, "Only available in ClickHouse Cloud", 0) \ + M(Bool, apply_mutations_on_fly, false, "Generate a conflict", 0) \ M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \ From 453d4d30cf3b788be2bd7023cfcc6c91455f3409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 21 Feb 2024 20:19:00 +0100 Subject: [PATCH 141/145] Revert "Support resource request canceling" --- docs/en/operations/system-tables/scheduler.md | 4 - src/Common/Scheduler/ISchedulerNode.h | 2 - src/Common/Scheduler/ISchedulerQueue.h | 6 - src/Common/Scheduler/Nodes/FairPolicy.h | 105 ++++++++---------- src/Common/Scheduler/Nodes/FifoQueue.h | 31 +----- src/Common/Scheduler/Nodes/PriorityPolicy.h | 38 +++---- .../tests/gtest_dynamic_resource_manager.cpp | 1 + .../Nodes/tests/gtest_resource_scheduler.cpp | 63 ----------- src/Common/Scheduler/ResourceGuard.h | 9 +- src/Common/Scheduler/ResourceRequest.cpp | 13 --- src/Common/Scheduler/ResourceRequest.h | 30 +++-- src/Common/Scheduler/SchedulerRoot.h | 32 +++--- .../System/StorageSystemScheduler.cpp | 4 - 13 files changed, 111 insertions(+), 227 deletions(-) delete mode 100644 src/Common/Scheduler/ResourceRequest.cpp diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md index c4de7f76fdc..953db4c28f2 100644 --- a/docs/en/operations/system-tables/scheduler.md +++ b/docs/en/operations/system-tables/scheduler.md @@ -26,9 +26,7 @@ priority: 0 is_active: 0 active_children: 0 dequeued_requests: 67 -canceled_requests: 0 dequeued_cost: 4692272 -canceled_cost: 0 busy_periods: 63 vruntime: 938454.1999999989 system_vruntime: ᴺᵁᴸᴸ @@ -56,9 +54,7 @@ Columns: - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. - `active_children` (`UInt64`) - The number of children in active state. - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. -- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node. - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. -- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node. - `busy_periods` (`UInt64`) - The total number of deactivations of this node. - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 20c1f4332da..804026d7bf4 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -387,9 +387,7 @@ public: /// Introspection std::atomic dequeued_requests{0}; - std::atomic canceled_requests{0}; std::atomic dequeued_cost{0}; - std::atomic canceled_cost{0}; std::atomic busy_periods{0}; }; diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h index 532f4bf6c63..cbe63bd304a 100644 --- a/src/Common/Scheduler/ISchedulerQueue.h +++ b/src/Common/Scheduler/ISchedulerQueue.h @@ -50,12 +50,6 @@ public: /// Should be called outside of scheduling subsystem, implementation must be thread-safe. virtual void enqueueRequest(ResourceRequest * request) = 0; - /// Cancel previously enqueued request. - /// Returns `false` and does nothing given unknown or already executed request. - /// Returns `true` if requests has been found and canceled. - /// Should be called outside of scheduling subsystem, implementation must be thread-safe. - virtual bool cancelRequest(ResourceRequest * request) = 0; - /// For introspection ResourceCost getBudget() const { diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index ce2bf729a04..c0e187e6fa9 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -134,65 +134,56 @@ public: std::pair dequeueRequest() override { - // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` - while (true) + if (heap_size == 0) + return {nullptr, false}; + + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + assert(request != nullptr); + std::pop_heap(items.begin(), items.begin() + heap_size); + Item & current = items[heap_size - 1]; + + // SFQ fairness invariant: system vruntime equals last served request start-time + assert(current.vruntime >= system_vruntime); + system_vruntime = current.vruntime; + + // By definition vruntime is amount of consumed resource (cost) divided by weight + current.vruntime += double(request->cost) / current.child->info.weight; + max_vruntime = std::max(max_vruntime, current.vruntime); + + if (child_active) // Put active child back in heap after vruntime update { - if (heap_size == 0) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - std::pop_heap(items.begin(), items.begin() + heap_size); - Item & current = items[heap_size - 1]; - - if (request) - { - // SFQ fairness invariant: system vruntime equals last served request start-time - assert(current.vruntime >= system_vruntime); - system_vruntime = current.vruntime; - - // By definition vruntime is amount of consumed resource (cost) divided by weight - current.vruntime += double(request->cost) / current.child->info.weight; - max_vruntime = std::max(max_vruntime, current.vruntime); - } - - if (child_active) // Put active child back in heap after vruntime update - { - std::push_heap(items.begin(), items.begin() + heap_size); - } - else // Deactivate child if it is empty, but remember it's vruntime for latter activations - { - heap_size--; - - // Store index of this inactive child in `parent.idx` - // This enables O(1) search of inactive children instead of O(n) - current.child->info.parent.idx = heap_size; - } - - // Reset any difference between children on busy period end - if (heap_size == 0) - { - // Reset vtime to zero to avoid floating-point error accumulation, - // but do not reset too often, because it's O(N) - UInt64 ns = clock_gettime_ns(); - if (last_reset_ns + 1000000000 < ns) - { - last_reset_ns = ns; - for (Item & item : items) - item.vruntime = 0; - max_vruntime = 0; - } - system_vruntime = max_vruntime; - busy_periods++; - } - - if (request) - { - dequeued_requests++; - dequeued_cost += request->cost; - return {request, heap_size > 0}; - } + std::push_heap(items.begin(), items.begin() + heap_size); } + else // Deactivate child if it is empty, but remember it's vruntime for latter activations + { + heap_size--; + + // Store index of this inactive child in `parent.idx` + // This enables O(1) search of inactive children instead of O(n) + current.child->info.parent.idx = heap_size; + } + + // Reset any difference between children on busy period end + if (heap_size == 0) + { + // Reset vtime to zero to avoid floating-point error accumulation, + // but do not reset too often, because it's O(N) + UInt64 ns = clock_gettime_ns(); + if (last_reset_ns + 1000000000 < ns) + { + last_reset_ns = ns; + for (Item & item : items) + item.vruntime = 0; + max_vruntime = 0; + } + system_vruntime = max_vruntime; + busy_periods++; + } + + dequeued_requests++; + dequeued_cost += request->cost; + return {request, heap_size > 0}; } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 45ed32343ff..38ae902bc2f 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -39,7 +39,8 @@ public: void enqueueRequest(ResourceRequest * request) override { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); + request->enqueue_ns = clock_gettime_ns(); queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); @@ -49,7 +50,7 @@ public: std::pair dequeueRequest() override { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); if (requests.empty()) return {nullptr, false}; ResourceRequest * result = requests.front(); @@ -62,29 +63,9 @@ public: return {result, !requests.empty()}; } - bool cancelRequest(ResourceRequest * request) override - { - std::lock_guard lock(mutex); - // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N) - for (auto i = requests.begin(), e = requests.end(); i != e; ++i) - { - if (*i == request) - { - requests.erase(i); - if (requests.empty()) - busy_periods++; - queue_cost -= request->cost; - canceled_requests++; - canceled_cost += request->cost; - return true; - } - } - return false; - } - bool isActive() override { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); return !requests.empty(); } @@ -117,14 +98,14 @@ public: std::pair getQueueLengthAndCost() { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); return {requests.size(), queue_cost}; } private: std::mutex mutex; Int64 queue_cost = 0; - std::deque requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel + std::deque requests; }; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index 9b4cfc37f8c..6d6b15bd063 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -102,31 +102,25 @@ public: std::pair dequeueRequest() override { - // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` - while (true) + if (items.empty()) + return {nullptr, false}; + + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + assert(request != nullptr); + + // Deactivate child if it is empty + if (!child_active) { + std::pop_heap(items.begin(), items.end()); + items.pop_back(); if (items.empty()) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - - // Deactivate child if it is empty - if (!child_active) - { - std::pop_heap(items.begin(), items.end()); - items.pop_back(); - if (items.empty()) - busy_periods++; - } - - if (request) - { - dequeued_requests++; - dequeued_cost += request->cost; - return {request, !items.empty()}; - } + busy_periods++; } + + dequeued_requests++; + dequeued_cost += request->cost; + return {request, !items.empty()}; } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp index cdf09776077..961a3b6f713 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp @@ -38,6 +38,7 @@ TEST(SchedulerDynamicResourceManager, Smoke) { ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking); gA.lock(); + gA.setFailure(); gA.unlock(); ResourceGuard gB(cB->get("res1")); diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index e76639a4b01..9fefbc02cbd 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -4,7 +4,6 @@ #include -#include #include using namespace DB; @@ -74,22 +73,6 @@ struct ResourceHolder } }; -struct MyRequest : public ResourceRequest -{ - std::function on_execute; - - explicit MyRequest(ResourceCost cost_, std::function on_execute_) - : ResourceRequest(cost_) - , on_execute(on_execute_) - {} - - void execute() override - { - if (on_execute) - on_execute(); - } -}; - TEST(SchedulerRoot, Smoke) { ResourceTest t; @@ -128,49 +111,3 @@ TEST(SchedulerRoot, Smoke) EXPECT_TRUE(fc2->requests.contains(&rg.request)); } } - -TEST(SchedulerRoot, Cancel) -{ - ResourceTest t; - - ResourceHolder r1(t); - auto * fc1 = r1.add("/", "1"); - r1.add("/prio"); - auto a = r1.addQueue("/prio/A", "1"); - auto b = r1.addQueue("/prio/B", "2"); - r1.registerResource(); - - std::barrier sync(2); - std::thread consumer1([&] - { - std::barrier destruct_sync(2); - MyRequest request(1,[&] - { - sync.arrive_and_wait(); // (A) - EXPECT_TRUE(fc1->requests.contains(&request)); - sync.arrive_and_wait(); // (B) - request.finish(); - destruct_sync.arrive_and_wait(); // (C) - }); - a.queue->enqueueRequest(&request); - destruct_sync.arrive_and_wait(); // (C) - }); - - std::thread consumer2([&] - { - MyRequest request(1,[&] - { - FAIL() << "This request must be canceled, but instead executes"; - }); - sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately - b.queue->enqueueRequest(&request); - bool canceled = b.queue->cancelRequest(&request); - EXPECT_TRUE(canceled); - sync.arrive_and_wait(); // (B) release request of consumer1 to be finished - }); - - consumer1.join(); - consumer2.join(); - - EXPECT_TRUE(fc1->requests.empty()); -} diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h index 50f665a384b..dca4041b176 100644 --- a/src/Common/Scheduler/ResourceGuard.h +++ b/src/Common/Scheduler/ResourceGuard.h @@ -71,7 +71,8 @@ public: // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread chassert(state == Dequeued); state = Finished; - ResourceRequest::finish(); + if (constraint) + constraint->finishRequest(this); } static Request & local() @@ -125,6 +126,12 @@ public: } } + /// Mark request as unsuccessful; by default request is considered to be successful + void setFailure() + { + request.successful = false; + } + ResourceLink link; Request & request; }; diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp deleted file mode 100644 index 26e8084cdfa..00000000000 --- a/src/Common/Scheduler/ResourceRequest.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -namespace DB -{ - -void ResourceRequest::finish() -{ - if (constraint) - constraint->finishRequest(this); -} - -} diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h index f3153ad382c..3d2230746f9 100644 --- a/src/Common/Scheduler/ResourceRequest.h +++ b/src/Common/Scheduler/ResourceRequest.h @@ -14,6 +14,9 @@ class ISchedulerConstraint; using ResourceCost = Int64; constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); +/// Timestamps (nanoseconds since epoch) +using ResourceNs = UInt64; + /* * Request for a resource consumption. The main moving part of the scheduling subsystem. * Resource requests processing workflow: @@ -28,7 +31,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. * 4) Callback ResourceRequest::execute() is called to provide access to the resource. * 5) The resource consumption is happening outside of the scheduling subsystem. - * 6) ResourceRequest::finish() is called when consumption is finished. + * 6) request->constraint->finishRequest() is called when consumption is finished. * * Steps (5) and (6) can be omitted if constraint is not used by the resource. * @@ -36,10 +39,7 @@ constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); * Request ownership is done outside of the scheduling subsystem. * After (6) request can be destructed safely. * - * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest(). - * Returning false means it is too late for request to be canceled. It should be processed in a regular way. - * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen - * and step (6) MUST be omitted. + * Request cancelling is not supported yet. */ class ResourceRequest { @@ -48,20 +48,32 @@ public: /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; + /// Request outcome + /// Should be filled during resource consumption + bool successful; + /// Scheduler node to be notified on consumption finish /// Auto-filled during request enqueue/dequeue ISchedulerConstraint * constraint; + /// Timestamps for introspection + ResourceNs enqueue_ns; + ResourceNs execute_ns; + ResourceNs finish_ns; + explicit ResourceRequest(ResourceCost cost_ = 1) { reset(cost_); } - /// ResourceRequest object may be reused again after reset() void reset(ResourceCost cost_) { cost = cost_; + successful = true; constraint = nullptr; + enqueue_ns = 0; + execute_ns = 0; + finish_ns = 0; } virtual ~ResourceRequest() = default; @@ -71,12 +83,6 @@ public: /// just triggering start of a consumption, not doing the consumption itself /// (e.g. setting an std::promise or creating a job in a thread pool) virtual void execute() = 0; - - /// Stop resource consumption and notify resource scheduler. - /// Should be called when resource consumption is finished by consumer. - /// ResourceRequest should not be destructed or reset before calling to `finish()`. - /// WARNING: this function MUST not be called if request was canceled. - void finish(); }; } diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index ab3f702a422..3a23a8df834 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -145,27 +145,22 @@ public: std::pair dequeueRequest() override { - while (true) - { - if (current == nullptr) // No active resources - return {nullptr, false}; + if (current == nullptr) // No active resources + return {nullptr, false}; - // Dequeue request from current resource - auto [request, resource_active] = current->root->dequeueRequest(); + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); + assert(request != nullptr); - // Deactivate resource if required - if (!resource_active) - deactivate(current); - else - current = current->next; // Just move round-robin pointer + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer - if (request == nullptr) // Possible in case of request cancel, just retry - continue; - - dequeued_requests++; - dequeued_cost += request->cost; - return {request, current != nullptr}; - } + dequeued_requests++; + dequeued_cost += request->cost; + return {request, current != nullptr}; } bool isActive() override @@ -250,6 +245,7 @@ private: void execute(ResourceRequest * request) { + request->execute_ns = clock_gettime_ns(); request->execute(); } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index 633bac5d285..ba07d44dbf9 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -30,9 +30,7 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, {"active_children", std::make_shared(), "The number of children in active state."}, {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, - {"canceled_requests", std::make_shared(), "The total number of resource requests canceled from this node."}, {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, - {"canceled_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."}, {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, {"vruntime", std::make_shared(std::make_shared()), "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, @@ -95,9 +93,7 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(node->isActive()); res_columns[i++]->insert(node->activeChildren()); res_columns[i++]->insert(node->dequeued_requests.load()); - res_columns[i++]->insert(node->canceled_requests.load()); res_columns[i++]->insert(node->dequeued_cost.load()); - res_columns[i++]->insert(node->canceled_cost.load()); res_columns[i++]->insert(node->busy_periods.load()); Field vruntime; From 64a80f10116488113f22ddbdb8fcb1151220bf55 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Feb 2024 20:33:39 +0100 Subject: [PATCH 142/145] Fix default path when path is not specified in config (#59654) * Update Server.cpp * Update SentryWriter.cpp * Update Keeper.cpp * Update SentryWriter.cpp --- programs/keeper/Keeper.cpp | 2 +- programs/server/Server.cpp | 2 +- src/Daemon/SentryWriter.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 5b844e7d650..8972c82eab8 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -560,7 +560,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", ""), + config().getString("path", KEEPER_DEFAULT_PATH), std::move(unused_cache), unused_event, [&](ConfigurationPtr config, bool /* initial_loading */) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a886ff9bcd0..74fcc7326fc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1292,7 +1292,7 @@ try auto main_config_reloader = std::make_unique( config_path, extra_paths, - config().getString("path", ""), + config().getString("path", DBMS_DEFAULT_PATH), std::move(main_config_zk_node_cache), main_config_zk_changed_event, [&](ConfigurationPtr config, bool initial_loading) diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index ebfd18abeee..192e9952b9a 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -78,7 +78,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) if (enabled) { - server_data_path = config.getString("path", ""); + server_data_path = config.getString("path", DB::DBMS_DEFAULT_PATH); const std::filesystem::path & default_tmp_path = fs::path(config.getString("tmp_path", fs::temp_directory_path())) / "sentry"; const std::string & endpoint = config.getString("send_crash_reports.endpoint"); From 82ba2ebdb246702615a9b564a6d626bb0695ee41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 21 Feb 2024 20:34:56 +0100 Subject: [PATCH 143/145] Revert "Generate a conflict" This reverts commit 5fc28c536cefac8aa0e58f3c2f5b78f2650a2d8f. --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index dcfe145b04c..433195af9c3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -640,7 +640,7 @@ class IColumn; M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \ M(Bool, describe_include_virtual_columns, false, "If true, virtual columns of table will be included into result of DESCRIBE query", 0) \ M(Bool, describe_compact_output, false, "If true, include only column names and types into result of DESCRIBE query", 0) \ - M(Bool, apply_mutations_on_fly, false, "Generate a conflict", 0) \ + M(Bool, apply_mutations_on_fly, false, "Only available in ClickHouse Cloud", 0) \ M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \ From a4f765cae7bc76d2af9f4bc4ca584b34f0575680 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 22 Feb 2024 09:51:10 +0100 Subject: [PATCH 144/145] Improve performance of SELECTs with active mutations (#59531) * Configure keeper for perf tests Signed-off-by: Azat Khuzhin * Improve performance of SELECTs with active mutations getAlterMutationCommandsForPart() can be a hot path for query execution when there are pending mutations. - LOG_TEST - it is not only check one bool, but actually a bunch of atomics as well. - Return std::vector over std::map (map is not required there) - no changes in performance. - Copy only RENAME_COLUMN (since only this mutation is required by AlterConversions). And here are results: run|result -|- SELECT w/o ALTER|queries: 1565, QPS: 355.259, RPS: 355.259 SELECT w/ ALTER unpatched|queries: 2099, QPS: 220.623, RPS: 220.623 SELECT w/ ALTER and w/o LOG_TEST|queries: 2730, QPS: 235.859, RPS: 235.859 SELECT w/ ALTER and w/o LOG_TEST and w/ RENAME_COLUMN only|queries: 2995, QPS: 290.982, RPS: 290.982 But there are still room for improvements, at least MergeTree engines could implement getStorageSnapshotForQuery(). Signed-off-by: Azat Khuzhin * Add AlterConversions::supportsMutationCommandType(), flatten vector> * Work around what appears to be a clang static analysis bug --------- Signed-off-by: Azat Khuzhin Co-authored-by: Michael Kolupaev --- src/Storages/MergeTree/AlterConversions.cpp | 5 +++ src/Storages/MergeTree/AlterConversions.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 7 ++-- src/Storages/MergeTree/MergeTreeData.h | 11 +++--- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 22 +++++++----- .../MergeTree/ReplicatedMergeTreeQueue.h | 2 +- src/Storages/StorageMergeTree.cpp | 12 ++++--- src/Storages/StorageMergeTree.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- tests/performance/alter_select.xml | 35 +++++++++++++++++++ tests/performance/scripts/compare.sh | 16 ++++++++- .../zzz-perf-comparison-tweaks-config.xml | 3 -- 13 files changed, 90 insertions(+), 31 deletions(-) create mode 100644 tests/performance/alter_select.xml diff --git a/src/Storages/MergeTree/AlterConversions.cpp b/src/Storages/MergeTree/AlterConversions.cpp index a98cd6d99f9..31f8f17e2c1 100644 --- a/src/Storages/MergeTree/AlterConversions.cpp +++ b/src/Storages/MergeTree/AlterConversions.cpp @@ -9,6 +9,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +bool AlterConversions::supportsMutationCommandType(MutationCommand::Type t) +{ + return t == MutationCommand::Type::RENAME_COLUMN; +} + void AlterConversions::addMutationCommand(const MutationCommand & command) { /// Currently only RENAME_COLUMN is applied on-fly. diff --git a/src/Storages/MergeTree/AlterConversions.h b/src/Storages/MergeTree/AlterConversions.h index 4410b9c56e2..0f857d351dd 100644 --- a/src/Storages/MergeTree/AlterConversions.h +++ b/src/Storages/MergeTree/AlterConversions.h @@ -35,6 +35,8 @@ public: /// Get column old name before rename (lookup by key in rename_map) std::string getColumnOldName(const std::string & new_name) const; + static bool supportsMutationCommandType(MutationCommand::Type); + private: /// Rename map new_name -> old_name. std::vector rename_map; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e14a358745e..2e63701dbdb 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7950,12 +7950,11 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartPtr part) const { - auto commands_map = getAlterMutationCommandsForPart(part); + auto commands = getAlterMutationCommandsForPart(part); auto result = std::make_shared(); - for (const auto & [_, commands] : commands_map) - for (const auto & command : commands) - result->addMutationCommand(command); + for (const auto & command : commands | std::views::reverse) + result->addMutationCommand(command); return result; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 1de79ed17ca..b06b3018938 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1356,11 +1356,12 @@ protected: /// mechanisms for parts locking virtual bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const = 0; - /// Return most recent mutations commands for part which weren't applied - /// Used to receive AlterConversions for part and apply them on fly. This - /// method has different implementations for replicated and non replicated - /// MergeTree because they store mutations in different way. - virtual std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; + /// Return pending mutations that weren't applied to `part` yet and should be applied on the fly + /// (i.e. when reading from the part). Mutations not supported by AlterConversions + /// (supportsMutationCommandType()) can be omitted. + /// + /// @return list of mutations, in *reverse* order (newest to oldest) + virtual MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const = 0; struct PartBackupEntries { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index e26a36202dd..6b730fbd6eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1789,7 +1789,7 @@ ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zk } -std::map ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const +MutationCommands ReplicatedMergeTreeQueue::getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const { std::unique_lock lock(state_mutex); @@ -1799,9 +1799,8 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo Int64 part_data_version = part->info.getDataVersion(); Int64 part_metadata_version = part->getMetadataVersion(); - LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); - std::map result; + MutationCommands result; bool seen_all_data_mutations = false; bool seen_all_metadata_mutations = false; @@ -1814,7 +1813,15 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo if (seen_all_data_mutations && seen_all_metadata_mutations) break; - auto alter_version = mutation_status->entry->alter_version; + auto & entry = mutation_status->entry; + + auto add_to_result = [&] { + for (const auto & command : entry->commands | std::views::reverse) + if (AlterConversions::supportsMutationCommandType(command.type)) + result.emplace_back(command); + }; + + auto alter_version = entry->alter_version; if (alter_version != -1) { if (alter_version > storage.getInMemoryMetadataPtr()->getMetadataVersion()) @@ -1822,22 +1829,19 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo /// We take commands with bigger metadata version if (alter_version > part_metadata_version) - result[mutation_version] = mutation_status->entry->commands; + add_to_result(); else seen_all_metadata_mutations = true; } else { if (mutation_version > part_data_version) - result[mutation_version] = mutation_status->entry->commands; + add_to_result(); else seen_all_data_mutations = true; } } - LOG_TEST(log, "Got {} commands for part {} (part data version {}, part metadata version {})", - result.size(), part->name, part_data_version, part_metadata_version); - return result; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 84106565dff..743ca7fc258 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -401,7 +401,7 @@ public: /// Return mutation commands for part which could be not applied to /// it according to part mutation version. Used when we apply alter commands on fly, /// without actual data modification on disk. - std::map getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const; + MutationCommands getAlterMutationCommandsForPart(const MergeTreeData::DataPartPtr & part) const; /// Mark finished mutations as done. If the function needs to be called again at some later time /// (because some mutations are probably done but we are not sure yet), returns true. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 678535da732..3458bd18ed3 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2394,19 +2394,21 @@ void StorageMergeTree::attachRestoredParts(MutableDataPartsVector && parts) } -std::map StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const +MutationCommands StorageMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const { std::lock_guard lock(currently_processing_in_background_mutex); UInt64 part_data_version = part->info.getDataVersion(); - std::map result; + MutationCommands result; for (const auto & [mutation_version, entry] : current_mutations_by_version | std::views::reverse) { - if (mutation_version > part_data_version) - result[mutation_version] = entry.commands; - else + if (mutation_version <= part_data_version) break; + + for (const auto & command : entry.commands | std::views::reverse) + if (AlterConversions::supportsMutationCommandType(command.type)) + result.emplace_back(command); } return result; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 359fa1d262d..8c41664b23c 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -308,7 +308,7 @@ private: }; protected: - std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const override; + MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; }; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8e1598a1eef..0618737a56d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8957,7 +8957,7 @@ bool StorageReplicatedMergeTree::canUseAdaptiveGranularity() const } -std::map StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const +MutationCommands StorageReplicatedMergeTree::getAlterMutationCommandsForPart(const DataPartPtr & part) const { return queue.getAlterMutationCommandsForPart(part); } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 79d6d1dce3d..1c2cdb3ec07 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -938,7 +938,7 @@ private: void waitMutationToFinishOnReplicas( const Strings & replicas, const String & mutation_id) const; - std::map getAlterMutationCommandsForPart(const DataPartPtr & part) const override; + MutationCommands getAlterMutationCommandsForPart(const DataPartPtr & part) const override; void startBackgroundMovesIfNeeded() override; diff --git a/tests/performance/alter_select.xml b/tests/performance/alter_select.xml new file mode 100644 index 00000000000..fbbf603dcba --- /dev/null +++ b/tests/performance/alter_select.xml @@ -0,0 +1,35 @@ + + + + engine + + mt + rmt + + + + + create table alter_select_mt (part_id String, col_0 String) engine=MergeTree() partition by part_id order by tuple() settings max_parts_to_merge_at_once=1 + create table alter_select_rmt (part_id String, col_0 String) engine=ReplicatedMergeTree('/tables/{{database}}', '{{table}}') partition by part_id order by tuple() settings max_parts_to_merge_at_once=1 + + system stop merges alter_select_{engine} + + + insert into alter_select_{engine} (part_id, col_0) + select toString(number % 5000), 0 from numbers(10000) + settings + max_block_size=1, + max_insert_threads=32, + min_insert_block_size_rows=1, + insert_deduplicate=false, + parts_to_delay_insert=100000, + parts_to_throw_insert=100000 + + alter table alter_select_{engine} drop column col_0 settings alter_sync = 0 + + select count() from alter_select_{engine} format Null settings max_threads=1 + select * from alter_select_{engine} format Null settings max_threads=1 + + drop table alter_select_{engine} + + diff --git a/tests/performance/scripts/compare.sh b/tests/performance/scripts/compare.sh index 39c6854fbf9..9a0fb5b335c 100755 --- a/tests/performance/scripts/compare.sh +++ b/tests/performance/scripts/compare.sh @@ -11,8 +11,14 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" # upstream/master LEFT_SERVER_PORT=9001 +LEFT_SERVER_KEEPER_PORT=9181 +LEFT_SERVER_KEEPER_RAFT_PORT=9234 +LEFT_SERVER_INTERSERVER_PORT=9009 # patched version -RIGHT_SERVER_PORT=9002 +RIGHT_SERVER_PORT=19001 +RIGHT_SERVER_KEEPER_PORT=19181 +RIGHT_SERVER_KEEPER_RAFT_PORT=19234 +RIGHT_SERVER_INTERSERVER_PORT=19009 # abort_conf -- abort if some options is not recognized # abort -- abort if something is not right in the env (i.e. per-cpu arenas does not work) @@ -127,6 +133,10 @@ function restart --user_files_path left/db/user_files --top_level_domains_path "$(left_or_right left top_level_domains)" --tcp_port $LEFT_SERVER_PORT + --keeper_server.tcp_port $LEFT_SERVER_KEEPER_PORT + --keeper_server.raft_configuration.server.port $LEFT_SERVER_KEEPER_RAFT_PORT + --zookeeper.node.port $LEFT_SERVER_KEEPER_PORT + --interserver_http_port $LEFT_SERVER_INTERSERVER_PORT ) left/clickhouse-server "${left_server_opts[@]}" &>> left-server-log.log & left_pid=$! @@ -142,6 +152,10 @@ function restart --user_files_path right/db/user_files --top_level_domains_path "$(left_or_right right top_level_domains)" --tcp_port $RIGHT_SERVER_PORT + --keeper_server.tcp_port $RIGHT_SERVER_KEEPER_PORT + --keeper_server.raft_configuration.server.port $RIGHT_SERVER_KEEPER_RAFT_PORT + --zookeeper.node.port $RIGHT_SERVER_KEEPER_PORT + --interserver_http_port $RIGHT_SERVER_INTERSERVER_PORT ) right/clickhouse-server "${right_server_opts[@]}" &>> right-server-log.log & right_pid=$! diff --git a/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml b/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml index 292665c4f68..c2bef2b479a 100644 --- a/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/tests/performance/scripts/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -2,10 +2,7 @@ - - - :: From 0c8aab91e69f709bbbcad8c2b00e5e6d03d140ad Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Feb 2024 14:32:30 +0100 Subject: [PATCH 145/145] Update analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index bbbb09bfd68..180456fffe2 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,4 +1,4 @@ -00223_shard_distributed_aggregation_memory_efficien +00223_shard_distributed_aggregation_memory_efficient 00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation