From dd3ea8d31b1b5615bcdd6b671f5e6e86315e8dda Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 10 Mar 2023 20:52:27 +0800 Subject: [PATCH 001/985] feat: modify materalized view query, check inner table structure --- src/Storages/StorageMaterializedView.cpp | 13 +++++++++++++ ...erialized_view_query_has_inner_table.reference | 2 ++ ...er_materialized_view_query_has_inner_table.sql | 15 +++++++++++++++ 3 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.reference create mode 100644 tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.sql diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index ae3fa62b38c..0bbd689043c 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,18 @@ void StorageMaterializedView::alter( DatabaseCatalog::instance().updateViewDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); new_metadata.setSelectQuery(new_select); + + /// check materialized view inner table structure + if (has_inner_table) + { + const Block & block = InterpreterSelectWithUnionQuery::getSampleBlock(new_select.select_query, local_context); + for (const auto & col : block.getColumnsWithTypeAndName()) + { + if (!tryGetTargetTable()->getInMemoryMetadata().columns.has(col.name)) + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "column {} is not in materialized view inner table", col.name); + } + } + } /// end modify query diff --git a/tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.reference b/tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.sql b/tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.sql new file mode 100644 index 00000000000..73bbac59a95 --- /dev/null +++ b/tests/queries/0_stateless/25340_alter_materialized_view_query_has_inner_table.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS src_table; +DROP TABLE IF EXISTS mv; + +CREATE TABLE src_table (`a` UInt32, `b` UInt32) ENGINE = MergeTree ORDER BY a; +CREATE MATERIALIZED VIEW mv UUID '2bad6d75-86fe-4da0-815b-2c7410253941' (`a` UInt32) ENGINE = MergeTree ORDER BY a AS SELECT a FROM src_table; + +INSERT INTO src_table (a, b) VALUES (1, 1), (2, 2); + +SELECT * FROM mv; + +SET allow_experimental_alter_materialized_view_structure = 1; +ALTER TABLE mv MODIFY QUERY SELECT a, b FROM src_table; -- {serverError QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW} + +DROP TABLE src_table; +DROP TABLE mv; \ No newline at end of file From 3f5853b970dd205465a5593d5786c1f8a4d82cc7 Mon Sep 17 00:00:00 2001 From: AN Date: Fri, 27 Oct 2023 19:17:13 +0300 Subject: [PATCH 002/985] Update index.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Punctuation fixes, узел→сервер as suggested by alexei-milovidov at https://github.com/ClickHouse/ClickHouse/pull/56040#issuecomment-1783155867, консистентность → согласованность (standard translation instead of calque) --- docs/ru/index.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/ru/index.md b/docs/ru/index.md index 78bb382753b..a9a666b18db 100644 --- a/docs/ru/index.md +++ b/docs/ru/index.md @@ -41,7 +41,7 @@ ClickHouse — столбцовая система управления база Разный порядок хранения данных лучше подходит для разных сценариев работы. Сценарий работы с данными — это то, какие производятся запросы, как часто и в каком соотношении; сколько читается данных на запросы каждого вида — строк, столбцов, байтов; как соотносятся чтения и обновления данных; какой рабочий размер данных и насколько локально он используется; используются ли транзакции и с какой изолированностью; какие требования к дублированию данных и логической целостности; требования к задержкам на выполнение и пропускной способности запросов каждого вида и т. п. -Чем больше нагрузка на систему, тем более важной становится специализация под сценарий работы, и тем более конкретной становится эта специализация. Не существует системы, одинаково хорошо подходящей под существенно различные сценарии работы. Если система подходит под широкое множество сценариев работы, то при достаточно большой нагрузке, система будет справляться со всеми сценариями работы плохо, или справляться хорошо только с одним из сценариев работы. +Чем больше нагрузка на систему, тем более важной становится специализация под сценарий работы, и тем более конкретной становится эта специализация. Не существует системы, одинаково хорошо подходящей под существенно различные сценарии работы. Если система подходит под широкое множество сценариев работы, то при достаточно большой нагрузке система будет справляться со всеми сценариями работы плохо, или справляться хорошо только с одним из сценариев работы. ## Ключевые особенности OLAP-сценария работы {#kliuchevye-osobennosti-olap-stsenariia-raboty} @@ -53,11 +53,11 @@ ClickHouse — столбцовая система управления база - запросы идут сравнительно редко (обычно не более сотни в секунду на сервер); - при выполнении простых запросов, допустимы задержки в районе 50 мс; - значения в столбцах достаточно мелкие — числа и небольшие строки (например, 60 байт на URL); -- требуется высокая пропускная способность при обработке одного запроса (до миллиардов строк в секунду на один узел); +- требуется высокая пропускная способность при обработке одного запроса (до миллиардов строк в секунду на один сервер); - транзакции отсутствуют; -- низкие требования к консистентности данных; -- в запросе одна большая таблица, все таблицы кроме одной маленькие; -- результат выполнения запроса существенно меньше исходных данных — то есть данные фильтруются или агрегируются; результат выполнения помещается в оперативную память одного узла. +- низкие требования к согласованности данных; +- в запросе одна большая таблица, все остальные таблицы из запроса — маленькие; +- результат выполнения запроса существенно меньше исходных данных — то есть данные фильтруются или агрегируются; результат выполнения помещается в оперативную память одного сервера. Легко видеть, что OLAP-сценарий работы существенно отличается от других распространённых сценариев работы (например, OLTP или Key-Value сценариев работы). Таким образом, не имеет никакого смысла пытаться использовать OLTP-системы или системы класса «ключ — значение» для обработки аналитических запросов, если вы хотите получить приличную производительность («выше плинтуса»). Например, если вы попытаетесь использовать для аналитики MongoDB или Redis — вы получите анекдотически низкую производительность по сравнению с OLAP-СУБД. @@ -77,11 +77,11 @@ ClickHouse — столбцовая система управления база ### По вводу-выводу {#po-vvodu-vyvodu} -1. Для выполнения аналитического запроса, требуется прочитать небольшое количество столбцов таблицы. В столбцовой БД для этого можно читать только нужные данные. Например, если вам требуется только 5 столбцов из 100, то следует рассчитывать на 20-кратное уменьшение ввода-вывода. -2. Так как данные читаются пачками, то их проще сжимать. Данные, лежащие по столбцам также лучше сжимаются. За счёт этого, дополнительно уменьшается объём ввода-вывода. -3. За счёт уменьшения ввода-вывода, больше данных влезает в системный кэш. +1. Для выполнения аналитического запроса требуется прочитать небольшое количество столбцов таблицы. В столбцовой БД для этого можно читать только нужные данные. Например, если вам требуется только 5 столбцов из 100, то следует рассчитывать на 20-кратное уменьшение ввода-вывода. +2. Так как данные читаются пачками, то их проще сжимать. Данные, лежащие по столбцам, также лучше сжимаются. За счёт этого, дополнительно уменьшается объём ввода-вывода. +3. За счёт уменьшения ввода-вывода больше данных влезает в системный кэш. -Например, для запроса «посчитать количество записей для каждой рекламной системы», требуется прочитать один столбец «идентификатор рекламной системы», который занимает 1 байт в несжатом виде. Если большинство переходов было не с рекламных систем, то можно рассчитывать хотя бы на десятикратное сжатие этого столбца. При использовании быстрого алгоритма сжатия, возможно разжатие данных со скоростью более нескольких гигабайт несжатых данных в секунду. То есть, такой запрос может выполняться со скоростью около нескольких миллиардов строк в секунду на одном сервере. На практике, такая скорость действительно достигается. +Например, для запроса «посчитать количество записей для каждой рекламной системы» требуется прочитать один столбец «идентификатор рекламной системы», который занимает 1 байт в несжатом виде. Если большинство переходов было не с рекламных систем, то можно рассчитывать хотя бы на десятикратное сжатие этого столбца. При использовании быстрого алгоритма сжатия возможно разжатие данных со скоростью более нескольких гигабайт несжатых данных в секунду. То есть такой запрос может выполняться со скоростью около нескольких миллиардов строк в секунду на одном сервере. На практике такая скорость действительно достигается. ### По вычислениям {#po-vychisleniiam} From 4dcbd6775a9cd1afe3c8be96e3c68c397ae547f0 Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Fri, 3 Nov 2023 09:46:35 -0600 Subject: [PATCH 003/985] Update kill.md Added additional examples and context for killing queries and mutations --- docs/en/sql-reference/statements/kill.md | 64 ++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index 294724dfa50..32de7a41e72 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -21,6 +21,35 @@ The queries to terminate are selected from the system.processes table using the Examples: +First, you'll need to get the list of incomplete queries. This SQL queries provides them according to those running the longest: + +List from a single ClickHouse node: +``` sql +SELECT + initial_query_id, + query_id, + formatReadableTimeDelta(elapsed) AS time_delta, + query, + * + FROM system.processes + WHERE query ILIKE 'SELECT%' + ORDER BY time_delta DESC; +``` + +List from a ClickHouse cluster: +``` sql +SELECT + initial_query_id, + query_id, + formatReadableTimeDelta(elapsed) AS time_delta, + query, + * + FROM clusterAllReplicas(default, system.processes) + WHERE query ILIKE 'SELECT%' + ORDER BY time_delta DESC; +``` + +Kill the query: ``` sql -- Forcibly terminates all queries with the specified query_id: KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' @@ -44,6 +73,8 @@ A test query (`TEST`) only checks the user’s rights and displays a list of que ## KILL MUTATION +One of the first things to check if a ClickHouse system or service is not running well is for long-running, incomplete mutations. The asynchronous (background) nature of mutations can cause a large queue of them that can then consume all available resources on the service. You may need to either pause all new mutations, INSERTs, and SELECTs and allow the queue of mutations to complete, or else manually kill some of these mutations. + ``` sql KILL MUTATION [ON CLUSTER cluster] WHERE @@ -57,6 +88,39 @@ A test query (`TEST`) only checks the user’s rights and displays a list of mut Examples: +Get a count() of the number of incomplete mutations: + +Count of mutations from a single ClickHouse node: +``` sql +SELECT count(*) +FROM system.mutations +WHERE is_done = 0; +``` + +Count of mutations from a ClickHouse cluster of replicas: +``` sql +SELECT count(*) +FROM clusterAllReplicas('default',system.mutations) +WHERE is_done = 0; +``` + +Query the list of incomplete mutations: + +List of mutations from a single ClickHouse node: +``` sql +SELECT mutation_id,* +FROM system.mutations +WHERE is_done = 0; +``` + +List of mutations from a ClickHouse cluster: +``` sql +SELECT mutation_id,* +FROM clusterAllReplicas('default',system.mutations) +WHERE is_done = 0; +``` + +Kill the mutations as needed: ``` sql -- Cancel and remove all mutations of the single table: KILL MUTATION WHERE database = 'default' AND table = 'table' From 1134af19caeaffcf70cc94146faed346d6af0cf6 Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 7 Nov 2023 22:33:29 -0800 Subject: [PATCH 004/985] [Docs] Fix typo Co-authored-by: Johnny <9611008+johnnymatthews@users.noreply.github.com> --- docs/en/sql-reference/statements/kill.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index 32de7a41e72..a7d050e548c 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -21,7 +21,7 @@ The queries to terminate are selected from the system.processes table using the Examples: -First, you'll need to get the list of incomplete queries. This SQL queries provides them according to those running the longest: +First, you'll need to get the list of incomplete queries. This SQL query provides them according to those running the longest: List from a single ClickHouse node: ``` sql From 7e0d95e48cb399c047c9756d81b0f76ce67ea57f Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 7 Nov 2023 22:33:43 -0800 Subject: [PATCH 005/985] [Docs] Formatting Co-authored-by: Johnny <9611008+johnnymatthews@users.noreply.github.com> --- docs/en/sql-reference/statements/kill.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index a7d050e548c..57448c4f441 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -88,7 +88,7 @@ A test query (`TEST`) only checks the user’s rights and displays a list of mut Examples: -Get a count() of the number of incomplete mutations: +Get a `count()` of the number of incomplete mutations: Count of mutations from a single ClickHouse node: ``` sql From ec02a2a2c4f4d4a279732df2c2dd61ab8b0cb80a Mon Sep 17 00:00:00 2001 From: Justin de Guzman Date: Tue, 7 Nov 2023 22:36:49 -0800 Subject: [PATCH 006/985] [Docs] Reword for clarity Co-authored-by: Johnny <9611008+johnnymatthews@users.noreply.github.com> --- docs/en/sql-reference/statements/kill.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/kill.md b/docs/en/sql-reference/statements/kill.md index 57448c4f441..c85870fc0c8 100644 --- a/docs/en/sql-reference/statements/kill.md +++ b/docs/en/sql-reference/statements/kill.md @@ -73,7 +73,10 @@ A test query (`TEST`) only checks the user’s rights and displays a list of que ## KILL MUTATION -One of the first things to check if a ClickHouse system or service is not running well is for long-running, incomplete mutations. The asynchronous (background) nature of mutations can cause a large queue of them that can then consume all available resources on the service. You may need to either pause all new mutations, INSERTs, and SELECTs and allow the queue of mutations to complete, or else manually kill some of these mutations. +The presence of long-running or incomplete mutations often indicates that a ClickHouse service is running poorly. The asynchronous nature of mutations can cause them to consume all available resources on a system. You may need to either: + +- Pause all new mutations, `INSERT`s , and `SELECT`s and allow the queue of mutations to complete. +- Or manually kill some of these mutations by sending a `KILLSIG` command. ``` sql KILL MUTATION [ON CLUSTER cluster] From 039bb1d599a5262e558b9b4ebd66fd85469afa3c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 22 Jan 2024 20:26:28 +0100 Subject: [PATCH 007/985] fix race on Context::async_insert_queue --- src/Interpreters/Context.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7e89c794712..51cfd302338 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -181,6 +181,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int CLUSTER_DOESNT_EXIST; + extern const int ABORTED; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -556,7 +557,12 @@ struct ContextSharedPart : boost::noncopyable return; /// Need to flush the async insert queue before shutting down the database catalog - async_insert_queue.reset(); + std::shared_ptr delete_async_insert_queue; + { + std::lock_guard lock(mutex); + delete_async_insert_queue = std::move(async_insert_queue); + } + delete_async_insert_queue.reset(); /// Stop periodic reloading of the configuration files. /// This must be done first because otherwise the reloading may pass a changed config @@ -4838,11 +4844,15 @@ PartUUIDsPtr Context::getIgnoredPartUUIDs() const AsynchronousInsertQueue * Context::getAsynchronousInsertQueue() const { - return shared->async_insert_queue.get(); + std::lock_guard lock(mutex); + if (auto res = shared->async_insert_queue.get()) + return res; + throw Exception(ErrorCodes::ABORTED, "AsynchronousInsertQueue is not initialized yet or has been already shutdown"); } void Context::setAsynchronousInsertQueue(const std::shared_ptr & ptr) { + std::lock_guard lock(mutex); using namespace std::chrono; if (std::chrono::milliseconds(settings.async_insert_busy_timeout_ms) == 0ms) From eb881667638524f182f06e19d699704ce9e86196 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 Jan 2024 00:28:28 +0100 Subject: [PATCH 008/985] Update Context.cpp --- src/Interpreters/Context.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 51cfd302338..217b247c21c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -181,7 +181,6 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int CLUSTER_DOESNT_EXIST; - extern const int ABORTED; } #define SHUTDOWN(log, desc, ptr, method) do \ @@ -4845,9 +4844,7 @@ PartUUIDsPtr Context::getIgnoredPartUUIDs() const AsynchronousInsertQueue * Context::getAsynchronousInsertQueue() const { std::lock_guard lock(mutex); - if (auto res = shared->async_insert_queue.get()) - return res; - throw Exception(ErrorCodes::ABORTED, "AsynchronousInsertQueue is not initialized yet or has been already shutdown"); + return shared->async_insert_queue.get(); } void Context::setAsynchronousInsertQueue(const std::shared_ptr & ptr) From 4cfc8d1a34342d44adbc7d9c8c3a4916670d68b2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 23 Jan 2024 00:30:42 +0100 Subject: [PATCH 009/985] better method name --- src/Interpreters/Context.cpp | 2 +- src/Interpreters/Context.h | 2 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Interpreters/executeQuery.cpp | 2 +- src/Server/TCPHandler.cpp | 2 +- src/Storages/System/StorageSystemAsynchronousInserts.cpp | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 217b247c21c..ab42e6b0ec9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4841,7 +4841,7 @@ PartUUIDsPtr Context::getIgnoredPartUUIDs() const return ignored_part_uuids; } -AsynchronousInsertQueue * Context::getAsynchronousInsertQueue() const +AsynchronousInsertQueue * Context::tryGetAsynchronousInsertQueue() const { std::lock_guard lock(mutex); return shared->async_insert_queue.get(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 65566876a80..6180bfbde88 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1203,7 +1203,7 @@ public: PartUUIDsPtr getPartUUIDs() const; PartUUIDsPtr getIgnoredPartUUIDs() const; - AsynchronousInsertQueue * getAsynchronousInsertQueue() const; + AsynchronousInsertQueue * tryGetAsynchronousInsertQueue() const; void setAsynchronousInsertQueue(const std::shared_ptr & ptr); ReadTaskCallback getReadTaskCallback() const; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1712c9608bf..f478b43049f 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -692,7 +692,7 @@ BlockIO InterpreterSystemQuery::execute() case Type::FLUSH_ASYNC_INSERT_QUEUE: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); - auto * queue = getContext()->getAsynchronousInsertQueue(); + auto * queue = getContext()->tryGetAsynchronousInsertQueue(); if (!queue) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot flush asynchronous insert queue because it is not initialized"); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 4b5a6a84e17..a84c957d9a8 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -924,7 +924,7 @@ static std::tuple executeQueryImpl( std::unique_ptr interpreter; bool async_insert = false; - auto * queue = context->getAsynchronousInsertQueue(); + auto * queue = context->tryGetAsynchronousInsertQueue(); auto * logger = &Poco::Logger::get("executeQuery"); if (insert_query && async_insert_enabled) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index fa7206eeaac..9bc6c3872fd 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -907,7 +907,7 @@ void TCPHandler::processInsertQuery() Block processed_block; const auto & settings = query_context->getSettingsRef(); - auto * insert_queue = query_context->getAsynchronousInsertQueue(); + auto * insert_queue = query_context->tryGetAsynchronousInsertQueue(); const auto & insert_query = assert_cast(*state.parsed_query); bool async_insert_enabled = settings.async_insert; diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 20ba4d1cdfb..b480821f8ea 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -34,7 +34,7 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co { using namespace std::chrono; - auto * insert_queue = context->getAsynchronousInsertQueue(); + auto * insert_queue = context->tryGetAsynchronousInsertQueue(); if (!insert_queue) return; From f91feb0dcb405df80f317f456372c7374f2c75ee Mon Sep 17 00:00:00 2001 From: Daniil Ivanik Date: Tue, 30 Jan 2024 14:17:11 +0100 Subject: [PATCH 010/985] Initial working commit --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 210 ++++++++++-------- src/Storages/SelectQueryInfo.h | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 8 +- src/Storages/System/StorageSystemNumbers.h | 36 +-- src/Storages/System/attachSystemTables.cpp | 5 +- src/TableFunctions/CMakeLists.txt | 2 +- src/TableFunctions/ITableFunction.cpp | 2 +- src/TableFunctions/TableFunctionNumbers.cpp | 2 +- .../TableFunctionsGenerateSeries.cpp | 100 +++++++++ src/TableFunctions/registerTableFunctions.cpp | 1 + src/TableFunctions/registerTableFunctions.h | 1 + 11 files changed, 234 insertions(+), 135 deletions(-) create mode 100644 src/TableFunctions/TableFunctionsGenerateSeries.cpp diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 5173b18c6bf..13a14ffb917 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -26,41 +26,59 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_) - : ISource(createHeader()), block_size(block_size_), next(offset_), step(step_) + NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_, const std::string& column_name, UInt64 inner_step_) + : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), step(step_), inner_step(inner_step_), inner_remainder(offset_ % inner_step_) { } String getName() const override { return "Numbers"; } - static Block createHeader() { return {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "number")}; } + static Block createHeader(const std::string& column_name) { return {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)}; } protected: Chunk generate() override { - auto column = ColumnUInt64::create(block_size); - ColumnUInt64::Container & vec = column->getData(); UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. + UInt64 first_element = (curr / inner_step) * inner_step + inner_remainder; + if (first_element < curr) { + first_element += inner_step; + } + UInt64 filtered_block_size = 0; + if (first_element - curr >= block_size) { + auto column = ColumnUInt64::create(0); + return {Columns{std::move(column)}, filtered_block_size}; + } + if (first_element - curr < block_size) { + filtered_block_size = (block_size - (first_element - curr) - 1) / inner_step + 1; + } + + auto column = ColumnUInt64::create(filtered_block_size); + ColumnUInt64::Container & vec = column->getData(); UInt64 * pos = vec.data(); /// This also accelerates the code. - UInt64 * end = &vec[block_size]; - iota(pos, static_cast(end - pos), curr); + UInt64 * end = &vec[filtered_block_size]; + iota(pos, static_cast(end - pos), UInt64{0}); + for (UInt64 p = 0; p < filtered_block_size; p += 1) { + vec[p] = vec[p] * inner_step + first_element; + } next += step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, block_size}; + return {Columns{std::move(column)}, filtered_block_size}; } private: UInt64 block_size; UInt64 next; UInt64 step; + UInt64 inner_step; + UInt64 inner_remainder; }; -UInt128 sizeOfRange(const Range & r) +[[maybe_unused]] UInt128 sizeOfRange(const Range & r) { UInt128 size; if (r.right.isPositiveInfinity()) @@ -77,7 +95,7 @@ UInt128 sizeOfRange(const Range & r) return size; }; -auto sizeOfRanges(const Ranges & rs) +[[maybe_unused]] auto sizeOfRanges(const Ranges & rs) { UInt128 total_size{}; for (const Range & r : rs) @@ -91,7 +109,7 @@ auto sizeOfRanges(const Ranges & rs) /// Generate numbers according to ranges. /// Numbers generated is ordered in one stream. /// Notice that we will not generate additional numbers out of ranges. -class NumbersRangedSource : public ISource +class [[maybe_unused]] NumbersRangedSource : public ISource { public: /// Represent a position in Ranges list. @@ -109,8 +127,8 @@ public: using RangesStatePtr = std::shared_ptr; - NumbersRangedSource(const Ranges & ranges_, RangesStatePtr & ranges_state_, UInt64 base_block_size_) - : ISource(NumbersSource::createHeader()), ranges(ranges_), ranges_state(ranges_state_), base_block_size(base_block_size_) + [[maybe_unused]] NumbersRangedSource(const Ranges & ranges_, RangesStatePtr & ranges_state_, UInt64 base_block_size_, const std::string& column_name) + : ISource(NumbersSource::createHeader(column_name)), ranges(ranges_), ranges_state(ranges_state_), base_block_size(base_block_size_) { } @@ -273,7 +291,7 @@ private: namespace { /// Whether we should push limit down to scan. -bool shouldPushdownLimit(SelectQueryInfo & query_info, UInt64 limit_length) +[[maybe_unused]] bool shouldPushdownLimit(SelectQueryInfo & query_info, UInt64 limit_length) { const auto & query = query_info.query->as(); /// Just ignore some minor cases, such as: @@ -286,7 +304,7 @@ bool shouldPushdownLimit(SelectQueryInfo & query_info, UInt64 limit_length) /// Shrink ranges to size. /// For example: ranges: [1, 5], [8, 100]; size: 7, we will get [1, 5], [8, 9] -void shrinkRanges(Ranges & ranges, size_t size) +[[maybe_unused]] void shrinkRanges(Ranges & ranges, size_t size) { size_t last_range_idx = 0; for (size_t i = 0; i < ranges.size(); i++) @@ -375,107 +393,107 @@ Pipe ReadFromSystemNumbersStep::makePipe() num_streams = 1; /// Build rpn of query filters - KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); + // KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); Pipe pipe; Ranges ranges; - if (condition.extractPlainRanges(ranges)) - { - /// Intersect ranges with table range - std::optional table_range; - std::optional overflowed_table_range; + // if (condition.extractPlainRanges(ranges)) + // { + // /// Intersect ranges with table range + // std::optional table_range; + // std::optional overflowed_table_range; - if (numbers_storage.limit.has_value()) - { - if (std::numeric_limits::max() - numbers_storage.offset >= *(numbers_storage.limit)) - { - table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(numbers_storage.offset + *(numbers_storage.limit)), false); - } - /// UInt64 overflow, for example: SELECT number FROM numbers(18446744073709551614, 5) - else - { - table_range.emplace(FieldRef(numbers_storage.offset), true, std::numeric_limits::max(), true); - auto overflow_end = UInt128(numbers_storage.offset) + UInt128(*numbers_storage.limit); - overflowed_table_range.emplace( - FieldRef(UInt64(0)), true, FieldRef(UInt64(overflow_end - std::numeric_limits::max() - 1)), false); - } - } - else - { - table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(std::numeric_limits::max()), true); - } + // if (numbers_storage.limit.has_value()) + // { + // if (std::numeric_limits::max() - numbers_storage.offset >= *(numbers_storage.limit)) + // { + // table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(numbers_storage.offset + *(numbers_storage.limit)), false); + // } + // /// UInt64 overflow, for example: SELECT number FROM numbers(18446744073709551614, 5) + // else + // { + // table_range.emplace(FieldRef(numbers_storage.offset), true, std::numeric_limits::max(), true); + // auto overflow_end = UInt128(numbers_storage.offset) + UInt128(*numbers_storage.limit); + // overflowed_table_range.emplace( + // FieldRef(UInt64(0)), true, FieldRef(UInt64(overflow_end - std::numeric_limits::max() - 1)), false); + // } + // } + // else + // { + // table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(std::numeric_limits::max()), true); + // } - Ranges intersected_ranges; - for (auto & r : ranges) - { - auto intersected_range = table_range->intersectWith(r); - if (intersected_range) - intersected_ranges.push_back(*intersected_range); - } - /// intersection with overflowed_table_range goes back. - if (overflowed_table_range.has_value()) - { - for (auto & r : ranges) - { - auto intersected_range = overflowed_table_range->intersectWith(r); - if (intersected_range) - intersected_ranges.push_back(*overflowed_table_range); - } - } + // Ranges intersected_ranges; + // for (auto & r : ranges) + // { + // auto intersected_range = table_range->intersectWith(r); + // if (intersected_range) + // intersected_ranges.push_back(*intersected_range); + // } + // /// intersection with overflowed_table_range goes back. + // if (overflowed_table_range.has_value()) + // { + // for (auto & r : ranges) + // { + // auto intersected_range = overflowed_table_range->intersectWith(r); + // if (intersected_range) + // intersected_ranges.push_back(*overflowed_table_range); + // } + // } - /// ranges is blank, return a source who has no data - if (intersected_ranges.empty()) - { - pipe.addSource(std::make_shared(NumbersSource::createHeader())); - return pipe; - } - const auto & limit_length = limit_length_and_offset.first; - const auto & limit_offset = limit_length_and_offset.second; + // /// ranges is blank, return a source who has no data + // if (intersected_ranges.empty()) + // { + // pipe.addSource(std::make_shared(NumbersSource::createHeader(numbers_storage.column_name))); + // return pipe; + // } + // const auto & limit_length = limit_length_and_offset.first; + // const auto & limit_offset = limit_length_and_offset.second; - /// If intersected ranges is limited or we can pushdown limit. - if (!intersected_ranges.rbegin()->right.isPositiveInfinity() || should_pushdown_limit) - { - UInt128 total_size = sizeOfRanges(intersected_ranges); - UInt128 query_limit = limit_length + limit_offset; + // /// If intersected ranges is limited or we can pushdown limit. + // if (!intersected_ranges.rbegin()->right.isPositiveInfinity() || should_pushdown_limit) + // { + // UInt128 total_size = sizeOfRanges(intersected_ranges); + // UInt128 query_limit = limit_length + limit_offset; - /// limit total_size by query_limit - if (should_pushdown_limit && query_limit < total_size) - { - total_size = query_limit; - /// We should shrink intersected_ranges for case: - /// intersected_ranges: [1, 4], [7, 100]; query_limit: 2 - shrinkRanges(intersected_ranges, total_size); - } + // /// limit total_size by query_limit + // if (should_pushdown_limit && query_limit < total_size) + // { + // total_size = query_limit; + // /// We should shrink intersected_ranges for case: + // /// intersected_ranges: [1, 4], [7, 100]; query_limit: 2 + // shrinkRanges(intersected_ranges, total_size); + // } - checkLimits(size_t(total_size)); + // checkLimits(size_t(total_size)); - if (total_size / max_block_size < num_streams) - num_streams = static_cast(total_size / max_block_size); + // if (total_size / max_block_size < num_streams) + // num_streams = static_cast(total_size / max_block_size); - if (num_streams == 0) - num_streams = 1; + // if (num_streams == 0) + // num_streams = 1; - /// Ranges state, all streams will share the state. - auto ranges_state = std::make_shared(); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared(intersected_ranges, ranges_state, max_block_size); + // /// Ranges state, all streams will share the state. + // auto ranges_state = std::make_shared(); + // for (size_t i = 0; i < num_streams; ++i) + // { + // auto source = std::make_shared(intersected_ranges, ranges_state, max_block_size, numbers_storage.column_name); - if (i == 0) - source->addTotalRowsApprox(total_size); + // if (i == 0) + // source->addTotalRowsApprox(total_size); - pipe.addSource(std::move(source)); - } - return pipe; - } - } + // pipe.addSource(std::move(source)); + // } + // return pipe; + // } + // } /// Fall back to NumbersSource for (size_t i = 0; i < num_streams; ++i) { auto source - = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size); + = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size, numbers_storage.column_name, numbers_storage.step); if (numbers_storage.limit && i == 0) { diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 662a5c0ef5a..2b4afaa6345 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -229,4 +229,4 @@ struct SelectQueryInfo bool isFinal() const; }; -} + } diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index b100be7cdf4..cd7207917a9 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -11,15 +11,16 @@ #include #include #include +#include namespace DB { -StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional limit_, UInt64 offset_) - : IStorage(table_id), multithreaded(multithreaded_), limit(limit_), offset(offset_) +StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, const std::string& column_name_, std::optional limit_, UInt64 offset_, UInt64 step_) + : IStorage(table_id), multithreaded(multithreaded_), limit(limit_), offset(offset_), column_name(column_name_), step(step_) { StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(ColumnsDescription({{"number", std::make_shared()}})); + storage_metadata.setColumns(ColumnsDescription({{column_name_, std::make_shared()}})); setInMemoryMetadata(storage_metadata); } @@ -33,6 +34,7 @@ void StorageSystemNumbers::read( size_t max_block_size, size_t num_streams) { + // LOG_DEBUG(&Poco::Logger::get("Reading from SystemNumbers"), "Limit : {}", limit.value()); query_plan.addStep(std::make_unique( column_names, shared_from_this(), storage_snapshot, query_info, std::move(context), max_block_size, num_streams)); } diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index fe6227db406..ffe87b8ad14 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -10,39 +10,11 @@ namespace DB class Context; - -/** Implements a table engine for the system table "numbers". - * The table contains the only column number UInt64. - * From this table, you can read all natural numbers, starting from 0 (to 2^64 - 1, and then again). - * - * You could also specify a limit (how many numbers to give). - * - * How to generate numbers? - * - * 1. First try a smart fashion: - * - * In this fashion we try to push filters and limit down to scanning. - * Firstly extract plain ranges(no overlapping and ordered) by filter expressions. - * - * For example: - * where (numbers > 1 and numbers < 3) or (numbers in (4, 6)) or (numbers > 7 and numbers < 9) - * - * We will get ranges - * (1, 3), [4, 4], [6, 6], (7, 9) - * - * Then split the ranges evenly to one or multi-streams. With this way we will get result without large scanning. - * - * 2. If fail to extract plain ranges, fall back to ordinary scanning. - * - * If multithreaded is specified, numbers will be generated in several streams - * (and result could be out of order). If both multithreaded and limit are specified, - * the table could give you not exactly 1..limit range, but some arbitrary 'limit' numbers. - */ -class StorageSystemNumbers final : public IStorage +class StorageSystemNumbers final : public IStorage { public: /// Otherwise, streams concurrently increment atomic. - StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional limit_ = std::nullopt, UInt64 offset_ = 0); + StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, const std::string& column_name, std::optional limit_ = std::nullopt, UInt64 offset_ = 0, UInt64 step_ = 1); std::string getName() const override { return "SystemNumbers"; } @@ -67,6 +39,10 @@ private: bool multithreaded; std::optional limit; UInt64 offset; + std::string column_name; + + UInt64 step; + }; } diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index bf898f57833..ddd89709b6a 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -118,8 +118,9 @@ namespace DB void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, bool has_zookeeper) { attach(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); - attach(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false); - attach(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true); + attach(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false, "number"); + attach(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true, "number"); + // attach(context, system_database, "generate_series", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", false, "generate_series"); attach(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); attach(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); attach(context, system_database, "databases", "Lists all databases of the current server."); diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index 770990cc405..c5c2a660935 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -29,7 +29,7 @@ if (TARGET ch_contrib::azure_sdk) target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::azure_sdk) endif () -if (TARGET ch_contrib::simdjson) +if (TARGET ch_co`trib::simdjson) target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::simdjson) endif () diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index 137e1dc27fe..c854b6b0f9c 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -5,7 +5,7 @@ #include #include #include - +#include namespace ProfileEvents { diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 262018f108c..71a9ba097c6 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -63,7 +63,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_f UInt64 offset = arguments.size() == 2 ? evaluateArgument(context, arguments[0]) : 0; UInt64 length = arguments.size() == 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); - auto res = std::make_shared(StorageID(getDatabaseName(), table_name), multithreaded, length, offset); + auto res = std::make_shared(StorageID(getDatabaseName(), table_name), multithreaded, std::string{"number"}, length, offset); res->startup(); return res; } diff --git a/src/TableFunctions/TableFunctionsGenerateSeries.cpp b/src/TableFunctions/TableFunctionsGenerateSeries.cpp new file mode 100644 index 00000000000..3941f1eadb2 --- /dev/null +++ b/src/TableFunctions/TableFunctionsGenerateSeries.cpp @@ -0,0 +1,100 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "registerTableFunctions.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +namespace +{ + +/* numbers(limit), numbers_mt(limit) + * - the same as SELECT number FROM system.numbers LIMIT limit. + * Used for testing purposes, as a simple example of table function. + */ +class TableFunctionGenerateSeries : public ITableFunction +{ +public: + static constexpr auto name = "generate_series"; + std::string getName() const override { return name; } + bool hasStaticStructure() const override { return true; } +private: + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; + const char * getStorageTypeName() const override { return "SystemNumbers"; } + + UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; + + ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; +}; + +ColumnsDescription TableFunctionGenerateSeries::getActualTableStructure(ContextPtr /*context*/, bool /*is_insert_query*/) const +{ + /// NOTE: https://bugs.llvm.org/show_bug.cgi?id=47418 + return ColumnsDescription{{{"generate_series", std::make_shared()}}}; +} + +StoragePtr TableFunctionGenerateSeries::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const +{ + if (const auto * function = ast_function->as()) + { + auto arguments = function->arguments->children; + + if (arguments.size() != 2 && arguments.size() != 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); + + UInt64 start = evaluateArgument(context, arguments[0]); + UInt64 stop = evaluateArgument(context, arguments[1]); + UInt64 interval = (arguments.size() == 3) ? evaluateArgument(context, arguments[2]) : UInt64{1}; + if (start > stop) { + auto res = std::make_shared(StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, 0); + res->startup(); + return res; + } + + auto res = std::make_shared(StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, (stop - start) / interval + 1, start, interval); + res->startup(); + return res; + } + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit' or 'offset, limit'.", getName()); +} + +UInt64 TableFunctionGenerateSeries::evaluateArgument(ContextPtr context, ASTPtr & argument) const +{ + const auto & [field, type] = evaluateConstantExpression(argument, context); + + if (!isNativeNumber(type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} expression, must be numeric type", type->getName()); + + Field converted = convertFieldToType(field, DataTypeUInt64()); + if (converted.isNull()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field)); + + return converted.safeGet(); +} + + +} + +void registerTableFunctionGenerateSeries(TableFunctionFactory & factory) +{ + factory.registerFunction({.documentation = {}, .allow_readonly = true}); + // factory.registerFunction({.documentation = {}, .allow_readonly = true}); +} + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 8c18c298f45..1631fa8e879 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -11,6 +11,7 @@ void registerTableFunctions() registerTableFunctionMerge(factory); registerTableFunctionRemote(factory); registerTableFunctionNumbers(factory); + registerTableFunctionGenerateSeries(factory); registerTableFunctionNull(factory); registerTableFunctionZeros(factory); registerTableFunctionExecutable(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index fae763e7dc8..111fbe8c22f 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -8,6 +8,7 @@ class TableFunctionFactory; void registerTableFunctionMerge(TableFunctionFactory & factory); void registerTableFunctionRemote(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); +void registerTableFunctionGenerateSeries(TableFunctionFactory & factory); void registerTableFunctionNull(TableFunctionFactory & factory); void registerTableFunctionZeros(TableFunctionFactory & factory); void registerTableFunctionExecutable(TableFunctionFactory & factory); From 3f0cfbd8c0816b007ff85b1a3997696ce5ed3214 Mon Sep 17 00:00:00 2001 From: Daniil Ivanik Date: Sat, 3 Feb 2024 19:46:00 +0100 Subject: [PATCH 011/985] Kek --- src/Common/iota.cpp | 29 ++ src/Common/iota.h | 9 + .../QueryPlan/ReadFromSystemNumbersStep.cpp | 281 ++++++++++-------- 3 files changed, 197 insertions(+), 122 deletions(-) diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp index 98f18eb195b..532c4bde76d 100644 --- a/src/Common/iota.cpp +++ b/src/Common/iota.cpp @@ -27,10 +27,39 @@ void iota(T * begin, size_t count, T first_value) return iotaImpl(begin, count, first_value); } +MULTITARGET_FUNCTION_AVX2_SSE42( + MULTITARGET_FUNCTION_HEADER(template void NO_INLINE), + iotaWithStepImpl, MULTITARGET_FUNCTION_BODY((T * begin, size_t count, T first_value, T step) /// NOLINT + { + for (size_t i = 0; i < count; i++) + *(begin + i) = static_cast(first_value + i * step); + }) +) + +template +void iota_with_step(T * begin, size_t count, T first_value, T step) +{ +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + return iotaWithStepImplAVX2(begin, count, first_value, step); + + if (isArchSupported(TargetArch::SSE42)) + return iotaWithStepImplSSE42(begin, count, first_value, step); +#endif + return iotaWithStepImpl(begin, count, first_value, step); +} + template void iota(UInt8 * begin, size_t count, UInt8 first_value); template void iota(UInt32 * begin, size_t count, UInt32 first_value); template void iota(UInt64 * begin, size_t count, UInt64 first_value); #if defined(OS_DARWIN) template void iota(size_t * begin, size_t count, size_t first_value); #endif + +template void iota_with_step(UInt8 * begin, size_t count, UInt8 first_value, UInt8 step); +template void iota_with_step(UInt32 * begin, size_t count, UInt32 first_value, UInt32 step); +template void iota_with_step(UInt64 * begin, size_t count, UInt64 first_value, UInt64 step); +#if defined(OS_DARWIN) +extern template void iota_with_step(size_t * begin, size_t count, size_t first_value, size_t step); +#endif } diff --git a/src/Common/iota.h b/src/Common/iota.h index 7910274d15d..f40cde9d5db 100644 --- a/src/Common/iota.h +++ b/src/Common/iota.h @@ -31,4 +31,13 @@ extern template void iota(UInt64 * begin, size_t count, UInt64 first_value); #if defined(OS_DARWIN) extern template void iota(size_t * begin, size_t count, size_t first_value); #endif + +template void iota_with_step(T * begin, size_t count, T first_value, T step); + +extern template void iota_with_step(UInt8 * begin, size_t count, UInt8 first_value, UInt8 step); +extern template void iota_with_step(UInt32 * begin, size_t count, UInt32 first_value, UInt32 step); +extern template void iota_with_step(UInt64 * begin, size_t count, UInt64 first_value, UInt64 step); +#if defined(OS_DARWIN) +extern template void iota(size_t * begin, size_t count, size_t first_value, size_t step); +#endif } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 13a14ffb917..f85473e43c3 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -12,6 +12,8 @@ #include #include +#include + namespace DB { @@ -30,9 +32,9 @@ public: : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), step(step_), inner_step(inner_step_), inner_remainder(offset_ % inner_step_) { } - String getName() const override { return "Numbers"; } + static Block createHeader(const std::string& column_name) { return {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)}; } protected: @@ -57,10 +59,7 @@ protected: ColumnUInt64::Container & vec = column->getData(); UInt64 * pos = vec.data(); /// This also accelerates the code. UInt64 * end = &vec[filtered_block_size]; - iota(pos, static_cast(end - pos), UInt64{0}); - for (UInt64 p = 0; p < filtered_block_size; p += 1) { - vec[p] = vec[p] * inner_step + first_element; - } + iota_with_step(pos, static_cast(end - pos), first_element, inner_step); next += step; @@ -77,28 +76,46 @@ private: UInt64 inner_remainder; }; - -[[maybe_unused]] UInt128 sizeOfRange(const Range & r) -{ - UInt128 size; - if (r.right.isPositiveInfinity()) - return static_cast(std::numeric_limits::max()) - r.left.get() + r.left_included; - - size = static_cast(r.right.get()) - r.left.get() + 1; - - if (!r.left_included) - size--; - - if (!r.right_included) - size--; - assert(size >= 0); - return size; +struct RangeWithStep { + Range range; + UInt64 step; }; -[[maybe_unused]] auto sizeOfRanges(const Ranges & rs) +using RangesWithStep = std::vector; + +std::optional stepped_range_from_range(const Range& r, UInt64 step, UInt64 remainder) { + UInt64 begin = (r.left.get() / step) * step; + if (begin > std::numeric_limits::max() - remainder) { + return std::nullopt; + } + begin += remainder; + while (begin <= r.left.get() - r.left_included) { + if (std::numeric_limits::max() - step < begin) { + return std::nullopt; + } + begin += step; + } + + LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); + UInt128 right_edge = (r.right.get() + r.right_included); + if (begin >= right_edge) { + return std::nullopt; + } + return std::optional{RangeWithStep{Range(begin, true, static_cast(right_edge - 1), true), step}}; +} + +[[maybe_unused]] UInt128 sizeOfRange(const RangeWithStep & r) +{ + if (r.range.right.isPositiveInfinity()) + return static_cast(std::numeric_limits::max() - r.range.left.get()) / r.step + r.range.left_included; + + return static_cast(r.range.right.get() - r.range.left.get()) / r.step + 1; +}; + +[[maybe_unused]] auto sizeOfRanges(const RangesWithStep & rs) { UInt128 total_size{}; - for (const Range & r : rs) + for (const RangeWithStep & r : rs) { /// total_size will never overflow total_size += sizeOfRange(r); @@ -127,7 +144,7 @@ public: using RangesStatePtr = std::shared_ptr; - [[maybe_unused]] NumbersRangedSource(const Ranges & ranges_, RangesStatePtr & ranges_state_, UInt64 base_block_size_, const std::string& column_name) + [[maybe_unused]] NumbersRangedSource(const RangesWithStep & ranges_, RangesStatePtr & ranges_state_, UInt64 base_block_size_, const std::string& column_name) : ISource(NumbersSource::createHeader(column_name)), ranges(ranges_), ranges_state(ranges_state_), base_block_size(base_block_size_) { } @@ -187,9 +204,9 @@ protected: if (ranges.empty()) return {}; - auto first_value = [](const Range & r) { return r.left.get() + (r.left_included ? 0 : 1); }; + auto first_value = [](const RangeWithStep & r) { return r.range.left.get() + (r.range.left_included ? 0 : 1); }; - auto last_value = [](const Range & r) { return r.right.get() - (r.right_included ? 0 : 1); }; + auto last_value = [](const RangeWithStep & r) { return r.range.right.get() - (r.range.right_included ? 0 : 1); }; /// Find the data range. /// If data left is small, shrink block size. @@ -215,31 +232,33 @@ protected: UInt128 can_provide = cursor.offset_in_ranges == end.offset_in_ranges ? end.offset_in_range - cursor.offset_in_range - : static_cast(last_value(range)) - first_value(range) + 1 - cursor.offset_in_range; + : static_cast(last_value(range) - first_value(range)) / range.step + 1 - cursor.offset_in_range; /// set value to block - auto set_value = [&pos](UInt128 & start_value, UInt128 & end_value) + auto set_value = [&pos, this](UInt128 & start_value, UInt128 & end_value) { if (end_value > std::numeric_limits::max()) { - while (start_value < end_value) - *(pos++) = start_value++; + while (start_value < end_value) { + *(pos++) = start_value; + start_value += this->step; + } } else { auto start_value_64 = static_cast(start_value); auto end_value_64 = static_cast(end_value); auto size = end_value_64 - start_value_64; - iota(pos, static_cast(size), start_value_64); + iota_with_step(pos, static_cast(size), start_value_64, step); pos += size; } }; if (can_provide > need) { - UInt64 start_value = first_value(range) + cursor.offset_in_range; + UInt64 start_value = first_value(range) + cursor.offset_in_range * step; /// end_value will never overflow - iota(pos, static_cast(need), start_value); + iota_with_step(pos, static_cast(need), start_value, step); pos += need; provided += need; @@ -248,8 +267,8 @@ protected: else if (can_provide == need) { /// to avoid UInt64 overflow - UInt128 start_value = static_cast(first_value(range)) + cursor.offset_in_range; - UInt128 end_value = start_value + need; + UInt128 start_value = static_cast(first_value(range)) + cursor.offset_in_range * step; + UInt128 end_value = start_value + need * step; set_value(start_value, end_value); provided += need; @@ -259,8 +278,8 @@ protected: else { /// to avoid UInt64 overflow - UInt128 start_value = static_cast(first_value(range)) + cursor.offset_in_range; - UInt128 end_value = start_value + can_provide; + UInt128 start_value = static_cast(first_value(range)) + cursor.offset_in_range * step; + UInt128 end_value = start_value + can_provide * step; set_value(start_value, end_value); provided += static_cast(can_provide); @@ -277,13 +296,15 @@ protected: private: /// The ranges is shared between all streams. - Ranges ranges; + RangesWithStep ranges; /// Ranges state shared between all streams, actually is the start of the ranges. RangesStatePtr ranges_state; /// Base block size, will shrink when data left is not enough. UInt64 base_block_size; + + UInt64 step; }; } @@ -304,7 +325,7 @@ namespace /// Shrink ranges to size. /// For example: ranges: [1, 5], [8, 100]; size: 7, we will get [1, 5], [8, 9] -[[maybe_unused]] void shrinkRanges(Ranges & ranges, size_t size) +[[maybe_unused]] void shrinkRanges(RangesWithStep & ranges, size_t size) { size_t last_range_idx = 0; for (size_t i = 0; i < ranges.size(); i++) @@ -323,9 +344,9 @@ namespace else { auto & range = ranges[i]; - UInt64 right = range.left.get() + static_cast(size); - range.right = Field(right); - range.right_included = !range.left_included; + UInt64 right = range.range.left.get() + static_cast(size); + range.range.right = Field(right); + range.range.right_included = !range.range.left_included; last_range_idx = i; break; } @@ -393,101 +414,117 @@ Pipe ReadFromSystemNumbersStep::makePipe() num_streams = 1; /// Build rpn of query filters - // KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); + KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); Pipe pipe; Ranges ranges; - // if (condition.extractPlainRanges(ranges)) - // { - // /// Intersect ranges with table range - // std::optional table_range; - // std::optional overflowed_table_range; + if (condition.extractPlainRanges(ranges)) + { + LOG_DEBUG(&Poco::Logger::get("My logger"), "Use optimization"); + /// Intersect ranges with table range + std::optional table_range; + std::optional overflowed_table_range; - // if (numbers_storage.limit.has_value()) - // { - // if (std::numeric_limits::max() - numbers_storage.offset >= *(numbers_storage.limit)) - // { - // table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(numbers_storage.offset + *(numbers_storage.limit)), false); - // } - // /// UInt64 overflow, for example: SELECT number FROM numbers(18446744073709551614, 5) - // else - // { - // table_range.emplace(FieldRef(numbers_storage.offset), true, std::numeric_limits::max(), true); - // auto overflow_end = UInt128(numbers_storage.offset) + UInt128(*numbers_storage.limit); - // overflowed_table_range.emplace( - // FieldRef(UInt64(0)), true, FieldRef(UInt64(overflow_end - std::numeric_limits::max() - 1)), false); - // } - // } - // else - // { - // table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(std::numeric_limits::max()), true); - // } + if (numbers_storage.limit.has_value()) + { + if (std::numeric_limits::max() - numbers_storage.offset >= *(numbers_storage.limit)) + { + table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(numbers_storage.offset + *(numbers_storage.limit)), false); + } + /// UInt64 overflow, for example: SELECT number FROM numbers(18446744073709551614, 5) + else + { + table_range.emplace(FieldRef(numbers_storage.offset), true, std::numeric_limits::max(), true); + auto overflow_end = UInt128(numbers_storage.offset) + UInt128(*numbers_storage.limit); + overflowed_table_range.emplace( + FieldRef(UInt64(0)), true, FieldRef(UInt64(overflow_end - std::numeric_limits::max() - 1)), false); + } + } + else + { + table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(std::numeric_limits::max()), true); + } + LOG_DEBUG(&Poco::Logger::get("My logger"), "Found table ranges"); - // Ranges intersected_ranges; - // for (auto & r : ranges) - // { - // auto intersected_range = table_range->intersectWith(r); - // if (intersected_range) - // intersected_ranges.push_back(*intersected_range); - // } - // /// intersection with overflowed_table_range goes back. - // if (overflowed_table_range.has_value()) - // { - // for (auto & r : ranges) - // { - // auto intersected_range = overflowed_table_range->intersectWith(r); - // if (intersected_range) - // intersected_ranges.push_back(*overflowed_table_range); - // } - // } + RangesWithStep intersected_ranges; + for (auto & r : ranges) + { + auto intersected_range = table_range->intersectWith(r); + if (intersected_range.has_value()) { + auto range_with_step = stepped_range_from_range(intersected_range.value(), numbers_storage.step, numbers_storage.offset % numbers_storage.step); + if (range_with_step.has_value()) { + intersected_ranges.push_back(*range_with_step); + } + } + } - // /// ranges is blank, return a source who has no data - // if (intersected_ranges.empty()) - // { - // pipe.addSource(std::make_shared(NumbersSource::createHeader(numbers_storage.column_name))); - // return pipe; - // } - // const auto & limit_length = limit_length_and_offset.first; - // const auto & limit_offset = limit_length_and_offset.second; - // /// If intersected ranges is limited or we can pushdown limit. - // if (!intersected_ranges.rbegin()->right.isPositiveInfinity() || should_pushdown_limit) - // { - // UInt128 total_size = sizeOfRanges(intersected_ranges); - // UInt128 query_limit = limit_length + limit_offset; + for (const auto& range : intersected_ranges) { + LOG_DEBUG(&Poco::Logger::get("Ranges"), "Left: {}; Right {}, LI: {}, RI: {}, Step: {}", range.range.left.get(), range.range.right.get(), range.range.left_included, range.range.right_included, range.step); + // std::cout << + } + /// intersection with overflowed_table_range goes back. + if (overflowed_table_range.has_value()) + { + for (auto & r : ranges) + { + auto intersected_range = overflowed_table_range->intersectWith(r); + if (intersected_range) { + auto range_with_step = stepped_range_from_range(intersected_range.value(), numbers_storage.step, static_cast((static_cast(numbers_storage.offset) + std::numeric_limits::max() + 1) % numbers_storage.step)); + if (range_with_step) { + intersected_ranges.push_back(*range_with_step); + } + } + } + } - // /// limit total_size by query_limit - // if (should_pushdown_limit && query_limit < total_size) - // { - // total_size = query_limit; - // /// We should shrink intersected_ranges for case: - // /// intersected_ranges: [1, 4], [7, 100]; query_limit: 2 - // shrinkRanges(intersected_ranges, total_size); - // } + /// ranges is blank, return a source who has no data + if (intersected_ranges.empty()) + { + pipe.addSource(std::make_shared(NumbersSource::createHeader(numbers_storage.column_name))); + return pipe; + } + const auto & limit_length = limit_length_and_offset.first; + const auto & limit_offset = limit_length_and_offset.second; - // checkLimits(size_t(total_size)); + /// If intersected ranges is limited or we can pushdown limit. + if (!intersected_ranges.rbegin()->range.right.isPositiveInfinity() || should_pushdown_limit) + { + UInt128 total_size = sizeOfRanges(intersected_ranges); + UInt128 query_limit = limit_length + limit_offset; - // if (total_size / max_block_size < num_streams) - // num_streams = static_cast(total_size / max_block_size); + /// limit total_size by query_limit + if (should_pushdown_limit && query_limit < total_size) + { + total_size = query_limit; + /// We should shrink intersected_ranges for case: + /// intersected_ranges: [1, 4], [7, 100]; query_limit: 2 + shrinkRanges(intersected_ranges, total_size); + } - // if (num_streams == 0) - // num_streams = 1; + checkLimits(size_t(total_size)); - // /// Ranges state, all streams will share the state. - // auto ranges_state = std::make_shared(); - // for (size_t i = 0; i < num_streams; ++i) - // { - // auto source = std::make_shared(intersected_ranges, ranges_state, max_block_size, numbers_storage.column_name); + if (total_size / max_block_size < num_streams) + num_streams = static_cast(total_size / max_block_size); - // if (i == 0) - // source->addTotalRowsApprox(total_size); + if (num_streams == 0) + num_streams = 1; - // pipe.addSource(std::move(source)); - // } - // return pipe; - // } - // } + /// Ranges state, all streams will share the state. + auto ranges_state = std::make_shared(); + for (size_t i = 0; i < num_streams; ++i) + { + auto source = std::make_shared(intersected_ranges, ranges_state, max_block_size, numbers_storage.column_name); + + if (i == 0) + source->addTotalRowsApprox(total_size); + + pipe.addSource(std::move(source)); + } + return pipe; + } + } /// Fall back to NumbersSource for (size_t i = 0; i < num_streams; ++i) From 623b42574587073845a76a5e28a502a792ee6662 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 6 Feb 2024 21:34:09 +0000 Subject: [PATCH 012/985] Add feature with the right author name --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 175 +++++++++++++----- src/Storages/SelectQueryInfo.h | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 8 +- src/Storages/System/StorageSystemNumbers.h | 15 +- src/Storages/System/attachSystemTables.cpp | 2 +- src/TableFunctions/CMakeLists.txt | 2 +- src/TableFunctions/ITableFunction.cpp | 2 +- ...es.cpp => TableFunctionGenerateSeries.cpp} | 55 ++++-- src/TableFunctions/TableFunctionNumbers.cpp | 45 +++-- .../02970_generate_series.reference | 28 +++ .../0_stateless/02970_generate_series.sql | 14 ++ 11 files changed, 254 insertions(+), 94 deletions(-) rename src/TableFunctions/{TableFunctionsGenerateSeries.cpp => TableFunctionGenerateSeries.cpp} (65%) create mode 100644 tests/queries/0_stateless/02970_generate_series.reference create mode 100644 tests/queries/0_stateless/02970_generate_series.sql diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index f85473e43c3..4b957778c43 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -28,32 +28,37 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_, const std::string& column_name, UInt64 inner_step_) - : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), step(step_), inner_step(inner_step_), inner_remainder(offset_ % inner_step_) + NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_, const std::string & column_name, UInt64 inner_step_) + : ISource(createHeader(column_name)) + , block_size(block_size_) + , next(offset_) + , step(step_) + , inner_step(inner_step_) + , inner_remainder(offset_ % inner_step_) { } String getName() const override { return "Numbers"; } - - static Block createHeader(const std::string& column_name) { return {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)}; } + static Block createHeader(const std::string & column_name) + { + return {ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), column_name)}; + } protected: Chunk generate() override { - UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 first_element = (curr / inner_step) * inner_step + inner_remainder; - if (first_element < curr) { + if (first_element < curr) first_element += inner_step; - } UInt64 filtered_block_size = 0; - if (first_element - curr >= block_size) { + if (first_element - curr >= block_size) + { auto column = ColumnUInt64::create(0); return {Columns{std::move(column)}, filtered_block_size}; } - if (first_element - curr < block_size) { + if (first_element - curr < block_size) filtered_block_size = (block_size - (first_element - curr) - 1) / inner_step + 1; - } auto column = ColumnUInt64::create(filtered_block_size); ColumnUInt64::Container & vec = column->getData(); @@ -76,32 +81,37 @@ private: UInt64 inner_remainder; }; -struct RangeWithStep { +struct RangeWithStep +{ Range range; UInt64 step; }; using RangesWithStep = std::vector; -std::optional stepped_range_from_range(const Range& r, UInt64 step, UInt64 remainder) { - UInt64 begin = (r.left.get() / step) * step; - if (begin > std::numeric_limits::max() - remainder) { +std::optional stepped_range_from_range(const Range & r, UInt64 step, UInt64 remainder) +{ + if ((r.right.get() == 0) && (!r.right_included)) + return std::nullopt; + UInt64 begin = (r.left.get() / step) * step; + if (begin > std::numeric_limits::max() - remainder) return std::nullopt; - } begin += remainder; - while (begin <= r.left.get() - r.left_included) { - if (std::numeric_limits::max() - step < begin) { + + // LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); + // LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); + while ((r.left_included <= r.left.get()) && (begin <= r.left.get() - r.left_included)) + { + if (std::numeric_limits::max() - step < begin) return std::nullopt; - } begin += step; } - LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); - UInt128 right_edge = (r.right.get() + r.right_included); - if (begin >= right_edge) { + // LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); + if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) return std::nullopt; - } - return std::optional{RangeWithStep{Range(begin, true, static_cast(right_edge - 1), true), step}}; + UInt64 right_edge_included = r.right.get() - (1 - r.right_included); + return std::optional{RangeWithStep{Range(begin, true, right_edge_included, true), step}}; } [[maybe_unused]] UInt128 sizeOfRange(const RangeWithStep & r) @@ -144,8 +154,17 @@ public: using RangesStatePtr = std::shared_ptr; - [[maybe_unused]] NumbersRangedSource(const RangesWithStep & ranges_, RangesStatePtr & ranges_state_, UInt64 base_block_size_, const std::string& column_name) - : ISource(NumbersSource::createHeader(column_name)), ranges(ranges_), ranges_state(ranges_state_), base_block_size(base_block_size_) + [[maybe_unused]] NumbersRangedSource( + const RangesWithStep & ranges_, + RangesStatePtr & ranges_state_, + UInt64 base_block_size_, + UInt64 step_, + const std::string & column_name) + : ISource(NumbersSource::createHeader(column_name)) + , ranges(ranges_) + , ranges_state(ranges_state_) + , base_block_size(base_block_size_) + , step(step_) { } @@ -158,6 +177,7 @@ protected: { std::lock_guard lock(ranges_state->mutex); + UInt64 need = base_block_size_; UInt64 size = 0; /// how many item found. @@ -196,6 +216,10 @@ protected: } ranges_state->pos = end; + + LOG_DEBUG(&Poco::Logger::get("Range borders"), "Begin: {} {}", start.offset_in_ranges, static_cast(start.offset_in_range)); + LOG_DEBUG(&Poco::Logger::get("Range borders"), "End: {} {}", end.offset_in_ranges, static_cast(end.offset_in_range)); + return size; } @@ -234,12 +258,19 @@ protected: ? end.offset_in_range - cursor.offset_in_range : static_cast(last_value(range) - first_value(range)) / range.step + 1 - cursor.offset_in_range; + LOG_DEBUG( + &Poco::Logger::get("Generate"), + "Can Provide: {}, Block size: {}", + static_cast(can_provide), + static_cast(block_size)); + /// set value to block auto set_value = [&pos, this](UInt128 & start_value, UInt128 & end_value) { if (end_value > std::numeric_limits::max()) { - while (start_value < end_value) { + while (start_value < end_value) + { *(pos++) = start_value; start_value += this->step; } @@ -248,7 +279,9 @@ protected: { auto start_value_64 = static_cast(start_value); auto end_value_64 = static_cast(end_value); - auto size = end_value_64 - start_value_64; + auto size = (end_value_64 - start_value_64) / this->step; + LOG_DEBUG( + &Poco::Logger::get("Iota"), "Size: {}, Step: {}, Start: {}", static_cast(size), this->step, start_value_64); iota_with_step(pos, static_cast(size), start_value_64, step); pos += size; } @@ -374,7 +407,7 @@ ReadFromSystemNumbersStep::ReadFromSystemNumbersStep( , key_expression{KeyDescription::parse(column_names[0], storage_snapshot->metadata->columns, context).expression} , max_block_size{max_block_size_} , num_streams{num_streams_} - , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) + , limit_length_and_offset(InterpreterSelectQuery::getLimitLengthAndOffset(query_info.query->as(), context)) , should_pushdown_limit(shouldPushdownLimit(query_info, limit_length_and_offset.first)) , limit(query_info.limit) , storage_limits(query_info.storage_limits) @@ -410,14 +443,28 @@ Pipe ReadFromSystemNumbersStep::makePipe() { auto & numbers_storage = storage->as(); + LOG_DEBUG( + &Poco::Logger::get("Parameters"), + "Parameters: Limit: {}, Offset: {} Step: {}", + numbers_storage.limit.value(), + numbers_storage.offset, + numbers_storage.step); + if (!numbers_storage.multithreaded) num_streams = 1; + Pipe pipe; + Ranges ranges; + + if (numbers_storage.limit.has_value() && (numbers_storage.limit.value() == 0)) + { + pipe.addSource(std::make_shared(NumbersSource::createHeader(numbers_storage.column_name))); + return pipe; + } + /// Build rpn of query filters KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); - Pipe pipe; - Ranges ranges; if (condition.extractPlainRanges(ranges)) { @@ -430,14 +477,15 @@ Pipe ReadFromSystemNumbersStep::makePipe() { if (std::numeric_limits::max() - numbers_storage.offset >= *(numbers_storage.limit)) { - table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(numbers_storage.offset + *(numbers_storage.limit)), false); + table_range.emplace( + FieldRef(numbers_storage.offset), true, FieldRef(numbers_storage.offset + *(numbers_storage.limit)), false); } /// UInt64 overflow, for example: SELECT number FROM numbers(18446744073709551614, 5) else { table_range.emplace(FieldRef(numbers_storage.offset), true, std::numeric_limits::max(), true); auto overflow_end = UInt128(numbers_storage.offset) + UInt128(*numbers_storage.limit); - overflowed_table_range.emplace( + overflowed_table_range.emplace( FieldRef(UInt64(0)), true, FieldRef(UInt64(overflow_end - std::numeric_limits::max() - 1)), false); } } @@ -451,34 +499,59 @@ Pipe ReadFromSystemNumbersStep::makePipe() for (auto & r : ranges) { auto intersected_range = table_range->intersectWith(r); - if (intersected_range.has_value()) { - auto range_with_step = stepped_range_from_range(intersected_range.value(), numbers_storage.step, numbers_storage.offset % numbers_storage.step); - if (range_with_step.has_value()) { + if (intersected_range.has_value()) + { + LOG_DEBUG( + &Poco::Logger::get("Ranges"), + "Ranges: {} {} {} {}", + intersected_range->left.get(), + intersected_range->right.get(), + intersected_range->left_included, + intersected_range->right_included); + auto range_with_step = stepped_range_from_range( + intersected_range.value(), numbers_storage.step, numbers_storage.offset % numbers_storage.step); + if (range_with_step.has_value()) + { + LOG_DEBUG( + &Poco::Logger::get("Ranges With Step"), + "Ranges: {} {} {} {} {}", + range_with_step->range.left.get(), + range_with_step->range.right.get(), + range_with_step->range.left_included, + range_with_step->range.right_included, + range_with_step->step); intersected_ranges.push_back(*range_with_step); } } } - for (const auto& range : intersected_ranges) { - LOG_DEBUG(&Poco::Logger::get("Ranges"), "Left: {}; Right {}, LI: {}, RI: {}, Step: {}", range.range.left.get(), range.range.right.get(), range.range.left_included, range.range.right_included, range.step); - // std::cout << - } /// intersection with overflowed_table_range goes back. if (overflowed_table_range.has_value()) { for (auto & r : ranges) { auto intersected_range = overflowed_table_range->intersectWith(r); - if (intersected_range) { - auto range_with_step = stepped_range_from_range(intersected_range.value(), numbers_storage.step, static_cast((static_cast(numbers_storage.offset) + std::numeric_limits::max() + 1) % numbers_storage.step)); - if (range_with_step) { + if (intersected_range) + { + auto range_with_step = stepped_range_from_range( + intersected_range.value(), + numbers_storage.step, + static_cast( + (static_cast(numbers_storage.offset) + std::numeric_limits::max() + 1) + % numbers_storage.step)); + if (range_with_step) intersected_ranges.push_back(*range_with_step); - } } } } + // for (const auto& range : intersected_ranges) + // { + // LOG_DEBUG(&Poco::Logger::get("Ranges with step"), "Left: {}; Right {}, LI: {}, RI: {}, Step: {}", range.range.left.get(), range.range.right.get(), range.range.left_included, range.range.right_included, range.step); + // // std::cout << + // } + /// ranges is blank, return a source who has no data if (intersected_ranges.empty()) { @@ -492,6 +565,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() if (!intersected_ranges.rbegin()->range.right.isPositiveInfinity() || should_pushdown_limit) { UInt128 total_size = sizeOfRanges(intersected_ranges); + LOG_DEBUG(&Poco::Logger::get("Total_Size"), "Total Size: {}", static_cast(total_size)); UInt128 query_limit = limit_length + limit_offset; /// limit total_size by query_limit @@ -515,7 +589,8 @@ Pipe ReadFromSystemNumbersStep::makePipe() auto ranges_state = std::make_shared(); for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(intersected_ranges, ranges_state, max_block_size, numbers_storage.column_name); + auto source = std::make_shared( + intersected_ranges, ranges_state, max_block_size, numbers_storage.step, numbers_storage.column_name); if (i == 0) source->addTotalRowsApprox(total_size); @@ -529,12 +604,16 @@ Pipe ReadFromSystemNumbersStep::makePipe() /// Fall back to NumbersSource for (size_t i = 0; i < num_streams; ++i) { - auto source - = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size, numbers_storage.column_name, numbers_storage.step); + auto source = std::make_shared( + max_block_size, + numbers_storage.offset + i * max_block_size, + num_streams * max_block_size, + numbers_storage.column_name, + numbers_storage.step); if (numbers_storage.limit && i == 0) { - auto rows_appr = *(numbers_storage.limit); + auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; if (limit > 0 && limit < rows_appr) rows_appr = limit; source->addTotalRowsApprox(rows_appr); @@ -546,7 +625,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() if (numbers_storage.limit) { size_t i = 0; - auto storage_limit = *(numbers_storage.limit); + auto storage_limit = (*numbers_storage.limit - 1) / numbers_storage.step + 1; /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. pipe.addSimpleTransform( [&](const Block & header) diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 2b4afaa6345..662a5c0ef5a 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -229,4 +229,4 @@ struct SelectQueryInfo bool isFinal() const; }; - } +} diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index cd7207917a9..4c319ec7105 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -16,7 +16,13 @@ namespace DB { -StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, const std::string& column_name_, std::optional limit_, UInt64 offset_, UInt64 step_) +StorageSystemNumbers::StorageSystemNumbers( + const StorageID & table_id, + bool multithreaded_, + const std::string & column_name_, + std::optional limit_, + UInt64 offset_, + UInt64 step_) : IStorage(table_id), multithreaded(multithreaded_), limit(limit_), offset(offset_), column_name(column_name_), step(step_) { StorageInMemoryMetadata storage_metadata; diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index ffe87b8ad14..9663ee25251 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -10,11 +10,17 @@ namespace DB class Context; -class StorageSystemNumbers final : public IStorage +class StorageSystemNumbers final : public IStorage { public: /// Otherwise, streams concurrently increment atomic. - StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, const std::string& column_name, std::optional limit_ = std::nullopt, UInt64 offset_ = 0, UInt64 step_ = 1); + StorageSystemNumbers( + const StorageID & table_id, + bool multithreaded_, + const std::string & column_name, + std::optional limit_ = std::nullopt, + UInt64 offset_ = 0, + UInt64 step_ = 1); std::string getName() const override { return "SystemNumbers"; } @@ -30,7 +36,6 @@ public: bool hasEvenlyDistributedRead() const override { return true; } bool isSystemStorage() const override { return true; } - bool supportsTransactions() const override { return true; } private: @@ -38,11 +43,9 @@ private: bool multithreaded; std::optional limit; - UInt64 offset; + UInt64 offset;` std::string column_name; - UInt64 step; - }; } diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index ddd89709b6a..9eacb07bd8d 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -120,7 +120,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "one", "This table contains a single row with a single dummy UInt8 column containing the value 0. Used when the table is not specified explicitly, for example in queries like `SELECT 1`."); attach(context, system_database, "numbers", "Generates all natural numbers, starting from 0 (to 2^64 - 1, and then again) in sorted order.", false, "number"); attach(context, system_database, "numbers_mt", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", true, "number"); - // attach(context, system_database, "generate_series", "Multithreaded version of `system.numbers`. Numbers order is not guaranteed.", false, "generate_series"); + attach(context, system_database, "generate_series", "Generates arithmetic progression of natural numbers in sorted order in a given segment with a given step", false, "generate_series"); attach(context, system_database, "zeros", "Produces unlimited number of non-materialized zeros.", false); attach(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); attach(context, system_database, "databases", "Lists all databases of the current server."); diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index c5c2a660935..770990cc405 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -29,7 +29,7 @@ if (TARGET ch_contrib::azure_sdk) target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::azure_sdk) endif () -if (TARGET ch_co`trib::simdjson) +if (TARGET ch_contrib::simdjson) target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::simdjson) endif () diff --git a/src/TableFunctions/ITableFunction.cpp b/src/TableFunctions/ITableFunction.cpp index c854b6b0f9c..137e1dc27fe 100644 --- a/src/TableFunctions/ITableFunction.cpp +++ b/src/TableFunctions/ITableFunction.cpp @@ -5,7 +5,7 @@ #include #include #include -#include + namespace ProfileEvents { diff --git a/src/TableFunctions/TableFunctionsGenerateSeries.cpp b/src/TableFunctions/TableFunctionGenerateSeries.cpp similarity index 65% rename from src/TableFunctions/TableFunctionsGenerateSeries.cpp rename to src/TableFunctions/TableFunctionGenerateSeries.cpp index 3941f1eadb2..88d7b0d1a71 100644 --- a/src/TableFunctions/TableFunctionsGenerateSeries.cpp +++ b/src/TableFunctions/TableFunctionGenerateSeries.cpp @@ -1,13 +1,13 @@ +#include +#include +#include +#include +#include +#include #include #include -#include -#include #include -#include -#include -#include -#include -#include +#include #include "registerTableFunctions.h" @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int INVALID_SETTING_VALUE; } namespace @@ -33,8 +34,14 @@ public: static constexpr auto name = "generate_series"; std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } + private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemNumbers"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; @@ -48,25 +55,31 @@ ColumnsDescription TableFunctionGenerateSeries::getActualTableStructure(ContextP return ColumnsDescription{{{"generate_series", std::make_shared()}}}; } -StoragePtr TableFunctionGenerateSeries::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const +StoragePtr TableFunctionGenerateSeries::executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { auto arguments = function->arguments->children; if (arguments.size() != 2 && arguments.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); UInt64 start = evaluateArgument(context, arguments[0]); UInt64 stop = evaluateArgument(context, arguments[1]); - UInt64 interval = (arguments.size() == 3) ? evaluateArgument(context, arguments[2]) : UInt64{1}; - if (start > stop) { - auto res = std::make_shared(StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, 0); - res->startup(); - return res; - } - - auto res = std::make_shared(StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, (stop - start) / interval + 1, start, interval); + UInt64 step = (arguments.size() == 3) ? evaluateArgument(context, arguments[2]) : UInt64{1}; + if (step == UInt64{0}) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Table function '{}' requires step to be a positive number", getName()); + auto res = (start > stop) + ? std::make_shared( + StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, 0, 0, 0) + : std::make_shared( + StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, (stop - start) + 1, start, step); res->startup(); return res; } @@ -82,8 +95,10 @@ UInt64 TableFunctionGenerateSeries::evaluateArgument(ContextPtr context, ASTPtr Field converted = convertFieldToType(field, DataTypeUInt64()); if (converted.isNull()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", - applyVisitor(FieldVisitorToString(), field)); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The value {} is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field)); return converted.safeGet(); } diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 71a9ba097c6..bcda8dc6a5e 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -1,13 +1,13 @@ +#include +#include +#include +#include +#include +#include #include #include -#include -#include #include -#include -#include -#include -#include -#include +#include #include "registerTableFunctions.h" @@ -16,8 +16,8 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace @@ -34,8 +34,14 @@ public: static constexpr auto name = multithreaded ? "numbers_mt" : "numbers"; std::string getName() const override { return name; } bool hasStaticStructure() const override { return true; } + private: - StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns, bool is_insert_query) const override; + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns, + bool is_insert_query) const override; const char * getStorageTypeName() const override { return "SystemNumbers"; } UInt64 evaluateArgument(ContextPtr context, ASTPtr & argument) const; @@ -51,19 +57,26 @@ ColumnsDescription TableFunctionNumbers::getActualTableStructure( } template -StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/, bool /*is_insert_query*/) const +StoragePtr TableFunctionNumbers::executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription /*cached_columns*/, + bool /*is_insert_query*/) const { if (const auto * function = ast_function->as()) { auto arguments = function->arguments->children; if (arguments.size() != 1 && arguments.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); UInt64 offset = arguments.size() == 2 ? evaluateArgument(context, arguments[0]) : 0; UInt64 length = arguments.size() == 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); - auto res = std::make_shared(StorageID(getDatabaseName(), table_name), multithreaded, std::string{"number"}, length, offset); + auto res = std::make_shared( + StorageID(getDatabaseName(), table_name), multithreaded, std::string{"number"}, length, offset); res->startup(); return res; } @@ -80,8 +93,10 @@ UInt64 TableFunctionNumbers::evaluateArgument(ContextPtr context, Field converted = convertFieldToType(field, DataTypeUInt64()); if (converted.isNull()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", - applyVisitor(FieldVisitorToString(), field)); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The value {} is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field)); return converted.safeGet(); } diff --git a/tests/queries/0_stateless/02970_generate_series.reference b/tests/queries/0_stateless/02970_generate_series.reference new file mode 100644 index 00000000000..9e6f1db911e --- /dev/null +++ b/tests/queries/0_stateless/02970_generate_series.reference @@ -0,0 +1,28 @@ +0 +1 +4 +8 +500000001 +50000000 +100000001 +0 +10 +13 +16 +19 +7 +17 +27 +37 +47 +57 +67 +77 +17 +22 +27 +32 +37 +42 +47 +52 diff --git a/tests/queries/0_stateless/02970_generate_series.sql b/tests/queries/0_stateless/02970_generate_series.sql new file mode 100644 index 00000000000..045f584a622 --- /dev/null +++ b/tests/queries/0_stateless/02970_generate_series.sql @@ -0,0 +1,14 @@ +SELECT count() FROM generate_series(5, 4); +SELECT count() FROM generate_series(0, 0); +SELECT count() FROM generate_series(10, 20, 3); +SELECT count() FROM generate_series(7, 77, 10); +SELECT count() FROM generate_series(0, 1000000000, 2); +SELECT count() FROM generate_series(0, 999999999, 20); +SELECT count() FROM generate_series(0, 1000000000, 2) WHERE generate_series % 5 == 0; + +SELECT * FROM generate_series(5, 4); +SELECT * FROM generate_series(0, 0); +SELECT * FROM generate_series(10, 20, 3); +SELECT * FROM generate_series(7, 77, 10); +SELECT * FROM generate_series(7, 52, 5) WHERE generate_series >= 13; + From 145e425ddd5707a5852dd3c6ac2672ccbd68e2bd Mon Sep 17 00:00:00 2001 From: divanik Date: Wed, 7 Feb 2024 15:29:45 +0000 Subject: [PATCH 013/985] Added Documentation --- .../table-functions/generate_series.md | 25 +++++++++ .../QueryPlan/ReadFromSystemNumbersStep.cpp | 53 ------------------- src/Storages/System/StorageSystemNumbers.h | 2 +- 3 files changed, 26 insertions(+), 54 deletions(-) create mode 100644 docs/en/sql-reference/table-functions/generate_series.md diff --git a/docs/en/sql-reference/table-functions/generate_series.md b/docs/en/sql-reference/table-functions/generate_series.md new file mode 100644 index 00000000000..de34e10ac76 --- /dev/null +++ b/docs/en/sql-reference/table-functions/generate_series.md @@ -0,0 +1,25 @@ +--- +slug: /en/sql-reference/table-functions/generate_series +sidebar_position: ? +sidebar_label: generate_series +--- + +# generate_series + +`generate_series(START, STOP)` - Returns a table with the single ‘generate_series’ column (UInt64) that contains integers from start to stop inclusively. + +`generate_series(START, STOP, STEP)` - Returns a table with the single ‘generate_series’ column (UInt64) that contains integers from start to stop inclusively with spacing between values given by STEP. + +The following queries return tables with the same content but different column names: + +``` sql +SELECT * FROM numbers(10, 5); +SELECT * FROM generate_series(10, 14); +``` + +And the following queries return tables with the same content but different column names (but the second option is more efficient): + +``` sql +SELECT * FROM numbers(10, 11) WHERE number % 3 == (10 % 3); +SELECT * FROM generate_series(10, 20, 3) ; +``` \ No newline at end of file diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 4b957778c43..3bb2e0cd69d 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -98,8 +98,6 @@ std::optional stepped_range_from_range(const Range & r, UInt64 st return std::nullopt; begin += remainder; - // LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); - // LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); while ((r.left_included <= r.left.get()) && (begin <= r.left.get() - r.left_included)) { if (std::numeric_limits::max() - step < begin) @@ -107,7 +105,6 @@ std::optional stepped_range_from_range(const Range & r, UInt64 st begin += step; } - // LOG_DEBUG(&Poco::Logger::get("stepped_range_from_range"), "Begin: {}", begin); if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) return std::nullopt; UInt64 right_edge_included = r.right.get() - (1 - r.right_included); @@ -217,9 +214,6 @@ protected: ranges_state->pos = end; - LOG_DEBUG(&Poco::Logger::get("Range borders"), "Begin: {} {}", start.offset_in_ranges, static_cast(start.offset_in_range)); - LOG_DEBUG(&Poco::Logger::get("Range borders"), "End: {} {}", end.offset_in_ranges, static_cast(end.offset_in_range)); - return size; } @@ -258,12 +252,6 @@ protected: ? end.offset_in_range - cursor.offset_in_range : static_cast(last_value(range) - first_value(range)) / range.step + 1 - cursor.offset_in_range; - LOG_DEBUG( - &Poco::Logger::get("Generate"), - "Can Provide: {}, Block size: {}", - static_cast(can_provide), - static_cast(block_size)); - /// set value to block auto set_value = [&pos, this](UInt128 & start_value, UInt128 & end_value) { @@ -280,8 +268,6 @@ protected: auto start_value_64 = static_cast(start_value); auto end_value_64 = static_cast(end_value); auto size = (end_value_64 - start_value_64) / this->step; - LOG_DEBUG( - &Poco::Logger::get("Iota"), "Size: {}, Step: {}, Start: {}", static_cast(size), this->step, start_value_64); iota_with_step(pos, static_cast(size), start_value_64, step); pos += size; } @@ -443,13 +429,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() { auto & numbers_storage = storage->as(); - LOG_DEBUG( - &Poco::Logger::get("Parameters"), - "Parameters: Limit: {}, Offset: {} Step: {}", - numbers_storage.limit.value(), - numbers_storage.offset, - numbers_storage.step); - if (!numbers_storage.multithreaded) num_streams = 1; @@ -468,7 +447,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() if (condition.extractPlainRanges(ranges)) { - LOG_DEBUG(&Poco::Logger::get("My logger"), "Use optimization"); /// Intersect ranges with table range std::optional table_range; std::optional overflowed_table_range; @@ -493,36 +471,11 @@ Pipe ReadFromSystemNumbersStep::makePipe() { table_range.emplace(FieldRef(numbers_storage.offset), true, FieldRef(std::numeric_limits::max()), true); } - LOG_DEBUG(&Poco::Logger::get("My logger"), "Found table ranges"); RangesWithStep intersected_ranges; for (auto & r : ranges) { auto intersected_range = table_range->intersectWith(r); - if (intersected_range.has_value()) - { - LOG_DEBUG( - &Poco::Logger::get("Ranges"), - "Ranges: {} {} {} {}", - intersected_range->left.get(), - intersected_range->right.get(), - intersected_range->left_included, - intersected_range->right_included); - auto range_with_step = stepped_range_from_range( - intersected_range.value(), numbers_storage.step, numbers_storage.offset % numbers_storage.step); - if (range_with_step.has_value()) - { - LOG_DEBUG( - &Poco::Logger::get("Ranges With Step"), - "Ranges: {} {} {} {} {}", - range_with_step->range.left.get(), - range_with_step->range.right.get(), - range_with_step->range.left_included, - range_with_step->range.right_included, - range_with_step->step); - intersected_ranges.push_back(*range_with_step); - } - } } @@ -546,11 +499,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() } } - // for (const auto& range : intersected_ranges) - // { - // LOG_DEBUG(&Poco::Logger::get("Ranges with step"), "Left: {}; Right {}, LI: {}, RI: {}, Step: {}", range.range.left.get(), range.range.right.get(), range.range.left_included, range.range.right_included, range.step); - // // std::cout << - // } /// ranges is blank, return a source who has no data if (intersected_ranges.empty()) @@ -565,7 +513,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() if (!intersected_ranges.rbegin()->range.right.isPositiveInfinity() || should_pushdown_limit) { UInt128 total_size = sizeOfRanges(intersected_ranges); - LOG_DEBUG(&Poco::Logger::get("Total_Size"), "Total Size: {}", static_cast(total_size)); UInt128 query_limit = limit_length + limit_offset; /// limit total_size by query_limit diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 9663ee25251..298721984b8 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -43,7 +43,7 @@ private: bool multithreaded; std::optional limit; - UInt64 offset;` + UInt64 offset; std::string column_name; UInt64 step; }; From 03aaedace439f5db6d9a6aaf91a1b2f978b0f6a9 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 9 Feb 2024 12:05:01 +0000 Subject: [PATCH 014/985] Fix bug --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 3bb2e0cd69d..bc14547889b 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -435,6 +435,8 @@ Pipe ReadFromSystemNumbersStep::makePipe() Pipe pipe; Ranges ranges; + // LOG_DEBUG(&Poco::Logger::get("parameters"), "Parameters: {} {} {}", numbers_storage.step, numbers_storage.limit.value(), numbers_storage.offset); + if (numbers_storage.limit.has_value() && (numbers_storage.limit.value() == 0)) { pipe.addSource(std::make_shared(NumbersSource::createHeader(numbers_storage.column_name))); @@ -476,6 +478,15 @@ Pipe ReadFromSystemNumbersStep::makePipe() for (auto & r : ranges) { auto intersected_range = table_range->intersectWith(r); + if (intersected_range.has_value()) + { + auto range_with_step = stepped_range_from_range( + intersected_range.value(), numbers_storage.step, numbers_storage.offset % numbers_storage.step); + if (range_with_step.has_value()) + { + intersected_ranges.push_back(*range_with_step); + } + } } From 1b2f23247b7f115ba92b9908d224d4e78e8649f4 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 9 Feb 2024 12:28:54 +0000 Subject: [PATCH 015/985] Change documentation --- docs/en/sql-reference/table-functions/generate_series.md | 2 +- src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/en/sql-reference/table-functions/generate_series.md b/docs/en/sql-reference/table-functions/generate_series.md index de34e10ac76..c5d29369627 100644 --- a/docs/en/sql-reference/table-functions/generate_series.md +++ b/docs/en/sql-reference/table-functions/generate_series.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/table-functions/generate_series -sidebar_position: ? +sidebar_position: 146 sidebar_label: generate_series --- diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index bc14547889b..ab2f726aeb5 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -483,9 +483,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() auto range_with_step = stepped_range_from_range( intersected_range.value(), numbers_storage.step, numbers_storage.offset % numbers_storage.step); if (range_with_step.has_value()) - { intersected_ranges.push_back(*range_with_step); - } } } From 79f91003538a71014eb035dca024285f2fbba7d5 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 9 Feb 2024 14:17:25 +0000 Subject: [PATCH 016/985] To pull --- src/TableFunctions/TableFunctionGenerateSeries.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionGenerateSeries.cpp b/src/TableFunctions/TableFunctionGenerateSeries.cpp index 88d7b0d1a71..65c4c4915c2 100644 --- a/src/TableFunctions/TableFunctionGenerateSeries.cpp +++ b/src/TableFunctions/TableFunctionGenerateSeries.cpp @@ -77,7 +77,7 @@ StoragePtr TableFunctionGenerateSeries::executeImpl( throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Table function '{}' requires step to be a positive number", getName()); auto res = (start > stop) ? std::make_shared( - StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, 0, 0, 0) + StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, 0, 0, 1) : std::make_shared( StorageID(getDatabaseName(), table_name), false, std::string{"generate_series"}, (stop - start) + 1, start, step); res->startup(); From f7dbcdd7e7e00d4fb6d30a02ebcb4a3befcd3190 Mon Sep 17 00:00:00 2001 From: divanik Date: Fri, 9 Feb 2024 18:12:24 +0000 Subject: [PATCH 017/985] Made refactoring --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 41 ++++++++++++------- src/Storages/System/StorageSystemNumbers.h | 28 +++++++++++++ 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 3a905a56aa1..2488fa37643 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -9,10 +9,12 @@ #include #include #include +#include #include #include #include +#include "base/types.h" namespace DB { @@ -28,13 +30,13 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_, const std::string & column_name, UInt64 inner_step_) + NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_, UInt64 remainder_) : ISource(createHeader(column_name)) , block_size(block_size_) , next(offset_) + , chunk_step(chunk_step_) , step(step_) - , inner_step(inner_step_) - , inner_remainder(offset_ % inner_step_) + , remainder(remainder_) { } String getName() const override { return "Numbers"; } @@ -48,25 +50,33 @@ protected: Chunk generate() override { UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. - UInt64 first_element = (curr / inner_step) * inner_step + inner_remainder; - if (first_element < curr) - first_element += inner_step; - UInt64 filtered_block_size = 0; + UInt64 first_element = (curr / step) * step; + if (first_element > std::numeric_limits::max() - remainder) { + auto column = ColumnUInt64::create(0); + return {Columns{std::move(column)}, 0}; + } + first_element += remainder; + if (first_element < curr) { + if (first_element > std::numeric_limits::max() - step) { + auto column = ColumnUInt64::create(0); + return {Columns{std::move(column)}, 0}; + } + first_element += step; + } if (first_element - curr >= block_size) { auto column = ColumnUInt64::create(0); - return {Columns{std::move(column)}, filtered_block_size}; + return {Columns{std::move(column)}, 0}; } - if (first_element - curr < block_size) - filtered_block_size = (block_size - (first_element - curr) - 1) / inner_step + 1; + UInt64 filtered_block_size = (block_size - (first_element - curr) - 1) / step + 1; auto column = ColumnUInt64::create(filtered_block_size); ColumnUInt64::Container & vec = column->getData(); UInt64 * pos = vec.data(); /// This also accelerates the code. UInt64 * end = &vec[filtered_block_size]; - iota_with_step(pos, static_cast(end - pos), first_element, inner_step); + iota_with_step(pos, static_cast(end - pos), first_element, step); - next += step; + next += chunk_step; progress(column->size(), column->byteSize()); @@ -76,9 +86,9 @@ protected: private: UInt64 block_size; UInt64 next; + UInt64 chunk_step; UInt64 step; - UInt64 inner_step; - UInt64 inner_remainder; + UInt64 remainder; }; struct RangeWithStep @@ -565,7 +575,8 @@ Pipe ReadFromSystemNumbersStep::makePipe() numbers_storage.offset + i * max_block_size, num_streams * max_block_size, numbers_storage.column_name, - numbers_storage.step); + numbers_storage.step, + numbers_storage.offset % numbers_storage.step); if (numbers_storage.limit && i == 0) { diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 298721984b8..c698bae4393 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -10,6 +10,34 @@ namespace DB class Context; +/** Implements a table engine for the system table "numbers". + * The table contains the only column number UInt64. + * From this table, you can read all natural numbers, starting from 0 (to 2^64 - 1, and then again). + * + * You could also specify a limit (how many numbers to give). + * + * How to generate numbers? + * + * 1. First try a smart fashion: + * + * In this fashion we try to push filters and limit down to scanning. + * Firstly extract plain ranges(no overlapping and ordered) by filter expressions. + * + * For example: + * where (numbers > 1 and numbers < 3) or (numbers in (4, 6)) or (numbers > 7 and numbers < 9) + * + * We will get ranges + * (1, 3), [4, 4], [6, 6], (7, 9) + * + * Then split the ranges evenly to one or multi-streams. With this way we will get result without large scanning. + * + * 2. If fail to extract plain ranges, fall back to ordinary scanning. + * + * If multithreaded is specified, numbers will be generated in several streams + * (and result could be out of order). If both multithreaded and limit are specified, + * the table could give you not exactly 1..limit range, but some arbitrary 'limit' numbers. + */ + class StorageSystemNumbers final : public IStorage { public: From 696609e7d562d15cfc7a6ffa776785444a97c2e7 Mon Sep 17 00:00:00 2001 From: divanik Date: Sat, 10 Feb 2024 19:59:57 +0000 Subject: [PATCH 018/985] Kek --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 2488fa37643..dc6aebc69c1 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -14,6 +14,7 @@ #include #include +#include "Core/Types.h" #include "base/types.h" namespace DB @@ -51,13 +52,16 @@ protected: { UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 first_element = (curr / step) * step; - if (first_element > std::numeric_limits::max() - remainder) { + if (first_element > std::numeric_limits::max() - remainder) + { auto column = ColumnUInt64::create(0); return {Columns{std::move(column)}, 0}; } first_element += remainder; - if (first_element < curr) { - if (first_element > std::numeric_limits::max() - step) { + if (first_element < curr) + { + if (first_element > std::numeric_limits::max() - step) + { auto column = ColumnUInt64::create(0); return {Columns{std::move(column)}, 0}; } @@ -101,6 +105,8 @@ using RangesWithStep = std::vector; std::optional stepped_range_from_range(const Range & r, UInt64 step, UInt64 remainder) { + // LOG_DEBUG(&Poco::Logger::get("Stepped from range"), + // "stepped from range"); if ((r.right.get() == 0) && (!r.right_included)) return std::nullopt; UInt64 begin = (r.left.get() / step) * step; @@ -126,7 +132,11 @@ std::optional stepped_range_from_range(const Range & r, UInt64 st if (r.range.right.isPositiveInfinity()) return static_cast(std::numeric_limits::max() - r.range.left.get()) / r.step + r.range.left_included; - return static_cast(r.range.right.get() - r.range.left.get()) / r.step + 1; + UInt128 size = static_cast(r.range.right.get() - r.range.left.get()) / r.step; + if (r.range.right_included && (r.range.right.get() % r.step == 0)) { + ++size; + } + return size; }; [[maybe_unused]] auto sizeOfRanges(const RangesWithStep & rs) @@ -173,6 +183,17 @@ public: , base_block_size(base_block_size_) , step(step_) { + // for (const auto& range_with_step : ranges_) { + // // LOG_DEBUG(&Poco::Logger::get("Ranges With Step"), + // // "Ranges: {} {} {} {} {}", + // // range_with_step.range.left.get(), + // // range_with_step.range.right.get(), + // // range_with_step.range.left_included, + // // range_with_step.range.right_included, + // // range_with_step.step); + // // LOG_DEBUG(&Poco::Logger::get("Ranges With Step"), + // // "Step: {}", step); + // } } String getName() const override { return "NumbersRange"; } @@ -241,6 +262,8 @@ protected: RangesPos start, end; auto block_size = findRanges(start, end, base_block_size); + // LOG_DEBUG(&Poco::Logger::get("Found range"), "Evth: {} {} {} {} {} {}", start.offset_in_ranges, static_cast(start.offset_in_range), end.offset_in_ranges, static_cast(end.offset_in_range), base_block_size, block_size); + if (!block_size) return {}; @@ -256,6 +279,11 @@ protected: while (block_size - provided != 0) { UInt64 need = block_size - provided; + // LOG_DEBUG(&Poco::Logger::get("Indices:"), + // "Indices: {} {}, provided: {}", + // ranges.size(), + // cursor.offset_in_ranges, + // provided); auto & range = ranges[cursor.offset_in_ranges]; UInt128 can_provide = cursor.offset_in_ranges == end.offset_in_ranges @@ -445,13 +473,15 @@ Pipe ReadFromSystemNumbersStep::makePipe() Pipe pipe; Ranges ranges; - // LOG_DEBUG(&Poco::Logger::get("parameters"), "Parameters: {} {} {}", numbers_storage.step, numbers_storage.limit.value(), numbers_storage.offset); + + // LOG_DEBUG(&Poco::Logger::get("parameters"), "Parameters: {} {} {} {}", numbers_storage.step, numbers_storage.offset, numbers_storage.limit.has_value(), numbers_storage.limit.has_value() ? numbers_storage.limit.value() : UInt64{0}); if (numbers_storage.limit.has_value() && (numbers_storage.limit.value() == 0)) { pipe.addSource(std::make_shared(NumbersSource::createHeader(numbers_storage.column_name))); return pipe; } + chassert(numbers_storage.step != UInt64{0}); /// Build rpn of query filters KeyCondition condition(buildFilterDAG(), context, column_names, key_expression); @@ -575,7 +605,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() numbers_storage.offset + i * max_block_size, num_streams * max_block_size, numbers_storage.column_name, - numbers_storage.step, + numbers_storage.step, numbers_storage.offset % numbers_storage.step); if (numbers_storage.limit && i == 0) From 3ec9f3c4c89dec2f1971979d7d3ae406c1ecd938 Mon Sep 17 00:00:00 2001 From: divanik Date: Sat, 10 Feb 2024 20:06:52 +0000 Subject: [PATCH 019/985] Check foormattign --- src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index dc6aebc69c1..1e6b539ee2e 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -52,15 +52,15 @@ protected: { UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 first_element = (curr / step) * step; - if (first_element > std::numeric_limits::max() - remainder) + if (first_element > std::numeric_limits::max() - remainder) { auto column = ColumnUInt64::create(0); return {Columns{std::move(column)}, 0}; } first_element += remainder; - if (first_element < curr) + if (first_element < curr) { - if (first_element > std::numeric_limits::max() - step) + if (first_element > std::numeric_limits::max() - step) { auto column = ColumnUInt64::create(0); return {Columns{std::move(column)}, 0}; @@ -133,9 +133,8 @@ std::optional stepped_range_from_range(const Range & r, UInt64 st return static_cast(std::numeric_limits::max() - r.range.left.get()) / r.step + r.range.left_included; UInt128 size = static_cast(r.range.right.get() - r.range.left.get()) / r.step; - if (r.range.right_included && (r.range.right.get() % r.step == 0)) { + if (r.range.right_included && (r.range.right.get() % r.step == 0)) ++size; - } return size; }; From d0456980991c45935fd316ca7dc2bd61cf45e5b9 Mon Sep 17 00:00:00 2001 From: divanik Date: Sat, 10 Feb 2024 23:04:52 +0000 Subject: [PATCH 020/985] It seems to work --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 31 ++++++------------- ...ble_functions_must_be_documented.reference | 1 + 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 1e6b539ee2e..2217e426b02 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -97,7 +97,8 @@ private: struct RangeWithStep { - Range range; + UInt64 left; + UInt64 right; UInt64 step; }; @@ -124,18 +125,12 @@ std::optional stepped_range_from_range(const Range & r, UInt64 st if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) return std::nullopt; UInt64 right_edge_included = r.right.get() - (1 - r.right_included); - return std::optional{RangeWithStep{Range(begin, true, right_edge_included, true), step}}; + return std::optional{RangeWithStep{begin, right_edge_included, step}}; } [[maybe_unused]] UInt128 sizeOfRange(const RangeWithStep & r) { - if (r.range.right.isPositiveInfinity()) - return static_cast(std::numeric_limits::max() - r.range.left.get()) / r.step + r.range.left_included; - - UInt128 size = static_cast(r.range.right.get() - r.range.left.get()) / r.step; - if (r.range.right_included && (r.range.right.get() % r.step == 0)) - ++size; - return size; + return static_cast(r.right - r.left) / r.step + 1; }; [[maybe_unused]] auto sizeOfRanges(const RangesWithStep & rs) @@ -252,10 +247,6 @@ protected: if (ranges.empty()) return {}; - auto first_value = [](const RangeWithStep & r) { return r.range.left.get() + (r.range.left_included ? 0 : 1); }; - - auto last_value = [](const RangeWithStep & r) { return r.range.right.get() - (r.range.right_included ? 0 : 1); }; - /// Find the data range. /// If data left is small, shrink block size. RangesPos start, end; @@ -287,7 +278,7 @@ protected: UInt128 can_provide = cursor.offset_in_ranges == end.offset_in_ranges ? end.offset_in_range - cursor.offset_in_range - : static_cast(last_value(range) - first_value(range)) / range.step + 1 - cursor.offset_in_range; + : static_cast(range.right - range.left) / range.step + 1 - cursor.offset_in_range; /// set value to block auto set_value = [&pos, this](UInt128 & start_value, UInt128 & end_value) @@ -312,7 +303,7 @@ protected: if (can_provide > need) { - UInt64 start_value = first_value(range) + cursor.offset_in_range * step; + UInt64 start_value = range.left + cursor.offset_in_range * step; /// end_value will never overflow iota_with_step(pos, static_cast(need), start_value, step); pos += need; @@ -323,7 +314,7 @@ protected: else if (can_provide == need) { /// to avoid UInt64 overflow - UInt128 start_value = static_cast(first_value(range)) + cursor.offset_in_range * step; + UInt128 start_value = static_cast(range.left) + cursor.offset_in_range * step; UInt128 end_value = start_value + need * step; set_value(start_value, end_value); @@ -334,7 +325,7 @@ protected: else { /// to avoid UInt64 overflow - UInt128 start_value = static_cast(first_value(range)) + cursor.offset_in_range * step; + UInt128 start_value = static_cast(range.left) + cursor.offset_in_range * step; UInt128 end_value = start_value + can_provide * step; set_value(start_value, end_value); @@ -400,9 +391,7 @@ namespace else { auto & range = ranges[i]; - UInt64 right = range.range.left.get() + static_cast(size); - range.range.right = Field(right); - range.range.right_included = !range.range.left_included; + range.right = range.left + static_cast(size) * range.step - 1; last_range_idx = i; break; } @@ -558,7 +547,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() const auto & limit_offset = limit_length_and_offset.second; /// If intersected ranges is limited or we can pushdown limit. - if (!intersected_ranges.rbegin()->range.right.isPositiveInfinity() || should_pushdown_limit) + if (should_pushdown_limit) { UInt128 total_size = sizeOfRanges(intersected_ranges); UInt128 query_limit = limit_length + limit_offset; diff --git a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference index e4040a2d371..1e4f21a6722 100644 --- a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.reference @@ -4,6 +4,7 @@ dictionary executable file generateRandom +generate_series input jdbc merge From 789d3c699c77d7a39f42281d9dc0c61010471242 Mon Sep 17 00:00:00 2001 From: divanik Date: Sat, 10 Feb 2024 23:49:58 +0000 Subject: [PATCH 021/985] Remove bug for mt --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 65 +++++++++---------- 1 file changed, 31 insertions(+), 34 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 2217e426b02..3656a6d31ee 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -547,42 +547,39 @@ Pipe ReadFromSystemNumbersStep::makePipe() const auto & limit_offset = limit_length_and_offset.second; /// If intersected ranges is limited or we can pushdown limit. - if (should_pushdown_limit) + UInt128 total_size = sizeOfRanges(intersected_ranges); + UInt128 query_limit = limit_length + limit_offset; + + /// limit total_size by query_limit + if (should_pushdown_limit && query_limit < total_size) { - UInt128 total_size = sizeOfRanges(intersected_ranges); - UInt128 query_limit = limit_length + limit_offset; - - /// limit total_size by query_limit - if (should_pushdown_limit && query_limit < total_size) - { - total_size = query_limit; - /// We should shrink intersected_ranges for case: - /// intersected_ranges: [1, 4], [7, 100]; query_limit: 2 - shrinkRanges(intersected_ranges, total_size); - } - - checkLimits(size_t(total_size)); - - if (total_size / max_block_size < num_streams) - num_streams = static_cast(total_size / max_block_size); - - if (num_streams == 0) - num_streams = 1; - - /// Ranges state, all streams will share the state. - auto ranges_state = std::make_shared(); - for (size_t i = 0; i < num_streams; ++i) - { - auto source = std::make_shared( - intersected_ranges, ranges_state, max_block_size, numbers_storage.step, numbers_storage.column_name); - - if (i == 0) - source->addTotalRowsApprox(total_size); - - pipe.addSource(std::move(source)); - } - return pipe; + total_size = query_limit; + /// We should shrink intersected_ranges for case: + /// intersected_ranges: [1, 4], [7, 100]; query_limit: 2 + shrinkRanges(intersected_ranges, total_size); } + + checkLimits(size_t(total_size)); + + if (total_size / max_block_size < num_streams) + num_streams = static_cast(total_size / max_block_size); + + if (num_streams == 0) + num_streams = 1; + + /// Ranges state, all streams will share the state. + auto ranges_state = std::make_shared(); + for (size_t i = 0; i < num_streams; ++i) + { + auto source = std::make_shared( + intersected_ranges, ranges_state, max_block_size, numbers_storage.step, numbers_storage.column_name); + + if (i == 0) + source->addTotalRowsApprox(total_size); + + pipe.addSource(std::move(source)); + } + return pipe; } /// Fall back to NumbersSource From 0f84f68da77663e2adcce800cceefff5ab019b58 Mon Sep 17 00:00:00 2001 From: divanik Date: Tue, 13 Feb 2024 09:59:39 +0000 Subject: [PATCH 022/985] Simplified code --- src/Common/iota.cpp | 2 +- src/Common/iota.h | 2 +- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 65 +++++-------------- .../02970_generate_series.reference | 1 - .../0_stateless/02970_generate_series.sql | 1 - 5 files changed, 20 insertions(+), 51 deletions(-) diff --git a/src/Common/iota.cpp b/src/Common/iota.cpp index 532c4bde76d..86c9e04bb06 100644 --- a/src/Common/iota.cpp +++ b/src/Common/iota.cpp @@ -60,6 +60,6 @@ template void iota_with_step(UInt8 * begin, size_t count, UInt8 first_value, UIn template void iota_with_step(UInt32 * begin, size_t count, UInt32 first_value, UInt32 step); template void iota_with_step(UInt64 * begin, size_t count, UInt64 first_value, UInt64 step); #if defined(OS_DARWIN) -extern template void iota_with_step(size_t * begin, size_t count, size_t first_value, size_t step); +template void iota_with_step(size_t * begin, size_t count, size_t first_value, size_t step); #endif } diff --git a/src/Common/iota.h b/src/Common/iota.h index f40cde9d5db..8fa18be9769 100644 --- a/src/Common/iota.h +++ b/src/Common/iota.h @@ -38,6 +38,6 @@ extern template void iota_with_step(UInt8 * begin, size_t count, UInt8 first_val extern template void iota_with_step(UInt32 * begin, size_t count, UInt32 first_value, UInt32 step); extern template void iota_with_step(UInt64 * begin, size_t count, UInt64 first_value, UInt64 step); #if defined(OS_DARWIN) -extern template void iota(size_t * begin, size_t count, size_t first_value, size_t step); +extern template void iota_with_step(size_t * begin, size_t count, size_t first_value, size_t step); #endif } diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 3656a6d31ee..d69e2b6ca5a 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -15,6 +15,7 @@ #include #include "Core/Types.h" +#include "base/Decimal_fwd.h" #include "base/types.h" namespace DB @@ -31,13 +32,12 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_, UInt64 remainder_) + NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) : ISource(createHeader(column_name)) , block_size(block_size_) , next(offset_) , chunk_step(chunk_step_) , step(step_) - , remainder(remainder_) { } String getName() const override { return "Numbers"; } @@ -50,41 +50,19 @@ public: protected: Chunk generate() override { - UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. - UInt64 first_element = (curr / step) * step; - if (first_element > std::numeric_limits::max() - remainder) - { - auto column = ColumnUInt64::create(0); - return {Columns{std::move(column)}, 0}; - } - first_element += remainder; - if (first_element < curr) - { - if (first_element > std::numeric_limits::max() - step) - { - auto column = ColumnUInt64::create(0); - return {Columns{std::move(column)}, 0}; - } - first_element += step; - } - if (first_element - curr >= block_size) - { - auto column = ColumnUInt64::create(0); - return {Columns{std::move(column)}, 0}; - } - UInt64 filtered_block_size = (block_size - (first_element - curr) - 1) / step + 1; - - auto column = ColumnUInt64::create(filtered_block_size); + auto column = ColumnUInt64::create(block_size); ColumnUInt64::Container & vec = column->getData(); + + UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 * pos = vec.data(); /// This also accelerates the code. - UInt64 * end = &vec[filtered_block_size]; - iota_with_step(pos, static_cast(end - pos), first_element, step); + UInt64 * end = &vec[block_size]; + iota_with_step(pos, static_cast(end - pos), curr, step); next += chunk_step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, filtered_block_size}; + return {Columns{std::move(column)}, block_size}; } private: @@ -92,14 +70,13 @@ private: UInt64 next; UInt64 chunk_step; UInt64 step; - UInt64 remainder; }; struct RangeWithStep { UInt64 left; - UInt64 right; UInt64 step; + UInt128 size; }; using RangesWithStep = std::vector; @@ -125,21 +102,16 @@ std::optional stepped_range_from_range(const Range & r, UInt64 st if ((begin >= r.right_included) && (begin - r.right_included >= r.right.get())) return std::nullopt; UInt64 right_edge_included = r.right.get() - (1 - r.right_included); - return std::optional{RangeWithStep{begin, right_edge_included, step}}; + return std::optional{RangeWithStep{begin, step, static_cast(right_edge_included - begin) / step + 1}}; } -[[maybe_unused]] UInt128 sizeOfRange(const RangeWithStep & r) -{ - return static_cast(r.right - r.left) / r.step + 1; -}; - [[maybe_unused]] auto sizeOfRanges(const RangesWithStep & rs) { UInt128 total_size{}; for (const RangeWithStep & r : rs) { /// total_size will never overflow - total_size += sizeOfRange(r); + total_size += r.size; } return total_size; }; @@ -211,7 +183,7 @@ protected: while (need != 0) { UInt128 can_provide = end.offset_in_ranges == ranges.size() ? static_cast(0) - : sizeOfRange(ranges[end.offset_in_ranges]) - end.offset_in_range; + : ranges[end.offset_in_ranges].size - end.offset_in_range; if (can_provide == 0) break; @@ -278,7 +250,7 @@ protected: UInt128 can_provide = cursor.offset_in_ranges == end.offset_in_ranges ? end.offset_in_range - cursor.offset_in_range - : static_cast(range.right - range.left) / range.step + 1 - cursor.offset_in_range; + : range.size - cursor.offset_in_range; /// set value to block auto set_value = [&pos, this](UInt128 & start_value, UInt128 & end_value) @@ -377,7 +349,7 @@ namespace size_t last_range_idx = 0; for (size_t i = 0; i < ranges.size(); i++) { - auto range_size = sizeOfRange(ranges[i]); + auto range_size = ranges[i].size; if (range_size < size) { size -= static_cast(range_size); @@ -391,7 +363,7 @@ namespace else { auto & range = ranges[i]; - range.right = range.left + static_cast(size) * range.step - 1; + range.size = static_cast(size); last_range_idx = i; break; } @@ -587,11 +559,10 @@ Pipe ReadFromSystemNumbersStep::makePipe() { auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * max_block_size, - num_streams * max_block_size, + numbers_storage.offset + i * max_block_size * numbers_storage.step, + num_streams * max_block_size * numbers_storage.step, numbers_storage.column_name, - numbers_storage.step, - numbers_storage.offset % numbers_storage.step); + numbers_storage.step); if (numbers_storage.limit && i == 0) { diff --git a/tests/queries/0_stateless/02970_generate_series.reference b/tests/queries/0_stateless/02970_generate_series.reference index 9e6f1db911e..4e4f556a39b 100644 --- a/tests/queries/0_stateless/02970_generate_series.reference +++ b/tests/queries/0_stateless/02970_generate_series.reference @@ -4,7 +4,6 @@ 8 500000001 50000000 -100000001 0 10 13 diff --git a/tests/queries/0_stateless/02970_generate_series.sql b/tests/queries/0_stateless/02970_generate_series.sql index 045f584a622..a7f89e1bd3f 100644 --- a/tests/queries/0_stateless/02970_generate_series.sql +++ b/tests/queries/0_stateless/02970_generate_series.sql @@ -4,7 +4,6 @@ SELECT count() FROM generate_series(10, 20, 3); SELECT count() FROM generate_series(7, 77, 10); SELECT count() FROM generate_series(0, 1000000000, 2); SELECT count() FROM generate_series(0, 999999999, 20); -SELECT count() FROM generate_series(0, 1000000000, 2) WHERE generate_series % 5 == 0; SELECT * FROM generate_series(5, 4); SELECT * FROM generate_series(0, 0); From d12ecdc5f06689d6259e2ef082a916f8b2f1836f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 13 Feb 2024 12:35:17 +0100 Subject: [PATCH 023/985] Asynchronous WriteBuffer for AzureBlobStorage --- src/Backups/BackupIO_AzureBlobStorage.cpp | 3 +- src/Core/Settings.h | 3 +- .../IO/WriteBufferFromAzureBlobStorage.cpp | 116 +++++++++++------- .../IO/WriteBufferFromAzureBlobStorage.h | 22 +++- .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 3 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 3 +- .../AzureBlobStorage/AzureObjectStorage.h | 5 +- src/IO/WriteBufferFromS3.cpp | 2 +- src/IO/WriteBufferFromS3.h | 5 +- src/IO/WriteBufferFromS3TaskTracker.cpp | 21 ++-- src/IO/WriteBufferFromS3TaskTracker.h | 8 +- 11 files changed, 124 insertions(+), 67 deletions(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 52ce20d5108..44a72f80456 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -278,7 +278,8 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin settings->max_single_part_upload_size, settings->max_unexpected_write_error_retries, DBMS_DEFAULT_BUFFER_SIZE, - write_settings); + write_settings, + settings->max_inflight_parts_for_one_file); } void BackupWriterAzureBlobStorage::removeFile(const String & file_name) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 44badfefabb..53de245bdfc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -80,7 +80,8 @@ class IColumn; M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ - M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited. You ", 0) \ + M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ + M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, azure_max_single_part_upload_size, 100*1024*1024, "The maximum size of object to upload using singlepart upload to Azure blob storage.", 0) \ M(UInt64, azure_max_single_part_copy_size, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage.", 0) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 905114f50e9..cbe2367823d 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -18,13 +18,21 @@ namespace ProfileEvents namespace DB { +struct WriteBufferFromAzureBlobStorage::PartData +{ + Memory<> memory; + size_t data_size = 0; +}; + WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, size_t max_unexpected_write_error_retries_, size_t buf_size_, - const WriteSettings & write_settings_) + const WriteSettings & write_settings_, + size_t max_inflight_parts_for_one_file_, + ThreadPoolCallbackRunner schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) , max_single_part_upload_size(max_single_part_upload_size_) @@ -32,7 +40,13 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( , blob_path(blob_path_) , write_settings(write_settings_) , blob_container_client(blob_container_client_) + , task_tracker( + std::make_unique( + std::move(schedule_), + max_inflight_parts_for_one_file_, + limitedLog)) { + allocateBuffer(); } @@ -79,60 +93,80 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() { execWithRetry([this](){ next(); }, max_unexpected_write_error_retries); - if (tmp_buffer_write_offset > 0) - uploadBlock(tmp_buffer->data(), tmp_buffer_write_offset); + task_tracker->waitAll(); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); - LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); -} - -void WriteBufferFromAzureBlobStorage::uploadBlock(const char * data, size_t size) -{ - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); - const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); - - Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(data), size); - execWithRetry([&](){ block_blob_client.StageBlock(block_id, memory_stream); }, max_unexpected_write_error_retries, size); - tmp_buffer_write_offset = 0; - - LOG_TRACE(log, "Staged block (id: {}) of size {} (blob path: {}).", block_id, size, blob_path); -} - -WriteBufferFromAzureBlobStorage::MemoryBufferPtr WriteBufferFromAzureBlobStorage::allocateBuffer() const -{ - return std::make_unique>(max_single_part_upload_size); + LOG_DEBUG(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } void WriteBufferFromAzureBlobStorage::nextImpl() { - size_t size_to_upload = offset(); + task_tracker->waitIfAny(); - if (size_to_upload == 0) - return; + reallocateBuffer(); + detachBuffer(); - if (!tmp_buffer) - tmp_buffer = allocateBuffer(); - - size_t uploaded_size = 0; - while (uploaded_size != size_to_upload) + while (!detached_part_data.empty()) { - size_t memory_buffer_remaining_size = max_single_part_upload_size - tmp_buffer_write_offset; - if (memory_buffer_remaining_size == 0) - uploadBlock(tmp_buffer->data(), tmp_buffer->size()); - - size_t size = std::min(memory_buffer_remaining_size, size_to_upload - uploaded_size); - memcpy(tmp_buffer->data() + tmp_buffer_write_offset, working_buffer.begin() + uploaded_size, size); - uploaded_size += size; - tmp_buffer_write_offset += size; + writePart(std::move(detached_part_data.front())); + detached_part_data.pop_front(); } - if (tmp_buffer_write_offset == max_single_part_upload_size) - uploadBlock(tmp_buffer->data(), tmp_buffer->size()); + allocateBuffer(); +} - if (write_settings.remote_throttler) - write_settings.remote_throttler->add(size_to_upload, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); +void WriteBufferFromAzureBlobStorage::allocateBuffer() +{ + memory = Memory(max_single_part_upload_size); + WriteBuffer::set(memory.data(), memory.size()); +} + + +void WriteBufferFromAzureBlobStorage::reallocateBuffer() +{ + chassert(offset() == 0); + + if (available() > 0) + return; + + if (memory.size() == max_single_part_upload_size) + return; + + memory.resize(max_single_part_upload_size); + + WriteBuffer::set(memory.data(), memory.size()); + + chassert(offset() == 0); +} + +void WriteBufferFromAzureBlobStorage::detachBuffer() +{ + size_t data_size = size_t(position() - memory.data()); + auto buf = std::move(memory); + WriteBuffer::set(nullptr, 0); + detached_part_data.push_back({std::move(buf), data_size}); +} + +void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage::PartData && data) +{ + if (data.data_size == 0) + return; + + auto upload_worker = [&] () + { + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); + + Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(data.memory.data()), data.data_size); + execWithRetry([&](){ block_blob_client.StageBlock(block_id, memory_stream); }, max_unexpected_write_error_retries, data.data_size); + + if (write_settings.remote_throttler) + write_settings.remote_throttler->add(data.data_size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); + }; + + task_tracker->add(std::move(upload_worker)); } } diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index f105b35c121..2d11014fa2a 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace Poco @@ -21,6 +22,8 @@ class Logger; namespace DB { +class TaskTracker; + class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase { public: @@ -32,7 +35,9 @@ public: size_t max_single_part_upload_size_, size_t max_unexpected_write_error_retries_, size_t buf_size_, - const WriteSettings & write_settings_); + const WriteSettings & write_settings_, + size_t max_inflight_parts_for_one_file_, + ThreadPoolCallbackRunner schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; @@ -42,11 +47,21 @@ public: void sync() override { next(); } private: + struct PartData; + + void writePart(WriteBufferFromAzureBlobStorage::PartData && data); + void detachBuffer(); + void allocateBuffer(); + void allocateFirstBuffer(); + void reallocateFirstBuffer(); + void reallocateBuffer(); + void finalizeImpl() override; void execWithRetry(std::function func, size_t num_tries, size_t cost = 0); void uploadBlock(const char * data, size_t size); LoggerPtr log; + LogSeriesLimiterPtr limitedLog = std::make_shared(log, 1, 5); const size_t max_single_part_upload_size; const size_t max_unexpected_write_error_retries; @@ -61,6 +76,11 @@ private: size_t tmp_buffer_write_offset = 0; MemoryBufferPtr allocateBuffer() const; + + bool first_buffer=true; + + std::unique_ptr task_tracker; + std::deque detached_part_data; }; } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index 72c4abee5c9..f99586b2d1a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -169,7 +169,8 @@ std::unique_ptr getAzureBlobStorageSettings(const Po config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024), config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size), config.getBool(config_prefix + ".use_native_copy", false), - config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries) + config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries), + config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file) ); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 74389aedb64..844789ea5b5 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -268,7 +268,8 @@ std::unique_ptr AzureObjectStorage::writeObject( /// NO settings.get()->max_single_part_upload_size, settings.get()->max_unexpected_write_error_retries, buf_size, - patchSettings(write_settings)); + patchSettings(write_settings), + settings.get()->max_inflight_parts_for_one_file); } /// Remove file. Throws exception if file doesn't exists or it's a directory. diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f16c35fb52c..1b473a01304 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -27,7 +27,8 @@ struct AzureObjectStorageSettings size_t max_upload_part_size_, size_t max_single_part_copy_size_, bool use_native_copy_, - size_t max_unexpected_write_error_retries_) + size_t max_unexpected_write_error_retries_, + size_t max_inflight_parts_for_one_file_) : max_single_part_upload_size(max_single_part_upload_size_) , min_bytes_for_seek(min_bytes_for_seek_) , max_single_read_retries(max_single_read_retries_) @@ -37,6 +38,7 @@ struct AzureObjectStorageSettings , max_single_part_copy_size(max_single_part_copy_size_) , use_native_copy(use_native_copy_) , max_unexpected_write_error_retries (max_unexpected_write_error_retries_) + , max_inflight_parts_for_one_file (max_inflight_parts_for_one_file_) { } @@ -52,6 +54,7 @@ struct AzureObjectStorageSettings size_t max_single_part_copy_size = 256 * 1024 * 1024; bool use_native_copy = false; size_t max_unexpected_write_error_retries = 4; + size_t max_inflight_parts_for_one_file = 20; }; using AzureClient = Azure::Storage::Blobs::BlobContainerClient; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 5bb01050591..6fc0a35672f 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -95,7 +95,7 @@ WriteBufferFromS3::WriteBufferFromS3( , object_metadata(std::move(object_metadata_)) , buffer_allocation_policy(ChooseBufferPolicy(upload_settings)) , task_tracker( - std::make_unique( + std::make_unique( std::move(schedule_), upload_settings.max_inflight_parts_for_one_file, limitedLog)) diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 230f39b074e..f3637122ee4 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -26,6 +27,8 @@ namespace DB * Data is divided on chunks with size greater than 'minimum_upload_part_size'. Last chunk can be less than this threshold. * Each chunk is written as a part to S3. */ +class TaskTracker; + class WriteBufferFromS3 final : public WriteBufferFromFileBase { public: @@ -118,7 +121,7 @@ private: size_t total_size = 0; size_t hidden_size = 0; - class TaskTracker; +// class TaskTracker; std::unique_ptr task_tracker; BlobStorageLogWriterPtr blob_log; diff --git a/src/IO/WriteBufferFromS3TaskTracker.cpp b/src/IO/WriteBufferFromS3TaskTracker.cpp index bce122dd6c8..e62de261fc2 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.cpp +++ b/src/IO/WriteBufferFromS3TaskTracker.cpp @@ -1,7 +1,5 @@ #include "config.h" -#if USE_AWS_S3 - #include namespace ProfileEvents @@ -12,19 +10,19 @@ namespace ProfileEvents namespace DB { -WriteBufferFromS3::TaskTracker::TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_) +TaskTracker::TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_) : is_async(bool(scheduler_)) , scheduler(scheduler_ ? std::move(scheduler_) : syncRunner()) , max_tasks_inflight(max_tasks_inflight_) , limitedLog(limitedLog_) {} -WriteBufferFromS3::TaskTracker::~TaskTracker() +TaskTracker::~TaskTracker() { safeWaitAll(); } -ThreadPoolCallbackRunner WriteBufferFromS3::TaskTracker::syncRunner() +ThreadPoolCallbackRunner TaskTracker::syncRunner() { return [](Callback && callback, int64_t) mutable -> std::future { @@ -35,7 +33,7 @@ ThreadPoolCallbackRunner WriteBufferFromS3::TaskTracker::syncRunner() }; } -void WriteBufferFromS3::TaskTracker::waitAll() +void TaskTracker::waitAll() { /// Exceptions are propagated for (auto & future : futures) @@ -48,7 +46,7 @@ void WriteBufferFromS3::TaskTracker::waitAll() finished_futures.clear(); } -void WriteBufferFromS3::TaskTracker::safeWaitAll() +void TaskTracker::safeWaitAll() { for (auto & future : futures) { @@ -71,7 +69,7 @@ void WriteBufferFromS3::TaskTracker::safeWaitAll() finished_futures.clear(); } -void WriteBufferFromS3::TaskTracker::waitIfAny() +void TaskTracker::waitIfAny() { if (futures.empty()) return; @@ -99,7 +97,7 @@ void WriteBufferFromS3::TaskTracker::waitIfAny() ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds()); } -void WriteBufferFromS3::TaskTracker::add(Callback && func) +void TaskTracker::add(Callback && func) { /// All this fuzz is about 2 things. This is the most critical place of TaskTracker. /// The first is not to fail insertion in the list `futures`. @@ -134,7 +132,7 @@ void WriteBufferFromS3::TaskTracker::add(Callback && func) waitTilInflightShrink(); } -void WriteBufferFromS3::TaskTracker::waitTilInflightShrink() +void TaskTracker::waitTilInflightShrink() { if (!max_tasks_inflight) return; @@ -166,11 +164,10 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink() ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds()); } -bool WriteBufferFromS3::TaskTracker::isAsync() const +bool TaskTracker::isAsync() const { return is_async; } } -#endif diff --git a/src/IO/WriteBufferFromS3TaskTracker.h b/src/IO/WriteBufferFromS3TaskTracker.h index 815e041ae52..134abbbc4c1 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.h +++ b/src/IO/WriteBufferFromS3TaskTracker.h @@ -1,9 +1,7 @@ #pragma once #include "config.h" - -#if USE_AWS_S3 - +#include #include "WriteBufferFromS3.h" #include @@ -22,7 +20,7 @@ namespace DB /// Basic exception safety is provided. If exception occurred the object has to be destroyed. /// No thread safety is provided. Use this object with no concurrency. -class WriteBufferFromS3::TaskTracker +class TaskTracker { public: using Callback = std::function; @@ -68,5 +66,3 @@ private: }; } - -#endif From 26fd3d0d852986b6bbaf595087cb0d06bdff9f93 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 14 Feb 2024 16:13:53 +0100 Subject: [PATCH 024/985] Removed offset check --- src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index cbe2367823d..d700090303a 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -126,8 +126,6 @@ void WriteBufferFromAzureBlobStorage::allocateBuffer() void WriteBufferFromAzureBlobStorage::reallocateBuffer() { - chassert(offset() == 0); - if (available() > 0) return; From 750a82a4ff615190a2793c0cfae9f4c1f5c75433 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Feb 2024 13:23:33 +0100 Subject: [PATCH 025/985] Update doc --- .../mergetree-family/mergetree.md | 2 + docs/en/operations/storing-data.md | 146 ++++++++++++++++-- 2 files changed, 134 insertions(+), 14 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f185c11bab3..e1eef8db9ab 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1106,6 +1106,8 @@ Configuration markup: ``` +Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage). + :::note cache configuration ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions. ::: diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 003277c8d4f..7a7edfb1a90 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -11,45 +11,163 @@ To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-en To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver). -## Configuring HDFS {#configuring-hdfs} +## Configuring external storage {#configuring-external-storage} -[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`. +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. Configuration markup: +Let's take a loop at different storage configuration options on the example of `S3` storage. +Firstly, define configuration in server configuration file. In order to configure `S3` storage the following configuration can be used: + ``` xml - - hdfs - hdfs://hdfs1:9000/clickhouse/ - + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + - +
- hdfs + s3
-
+
+
+``` +Starting with 24.1 clickhouse version, a different type of configuration is supported in addition to the older one: + +``` xml + + + + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + + + + + +
+ s3 +
+
+
+
+
+
+``` + +In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file: + +``` xml + - 0 + s3 ``` -Required parameters: +If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table: -- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS storage_policy = 's3'; +``` -Optional parameters: +You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough. -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = 's3'; +``` + +There is also a possibility to specify storage configuration without a preconfigured disk in configuration file: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1); +``` + +Adding cache is also possible: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1)); +``` + +A combination of config file disk configuration and sql-defined configuration is also possible: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = 's3'); +``` + +Here `s3` is a disk name from server configuration file, while `cache` disk is defined via sql. + +Let's take a closer look at configuration parameters. + +All disk configuration require `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`. Then goes configuration of a specific storage type. +Starting from 24.1 clickhouse version, you can you a new configuration option. For it you are required to specify `type` as `object_storage`, `object_storage_type` as one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`, and optionally you can specify `metadata_type`, which is `local` by default, but it can also be set to `plain`, `web`. + +E.g. first configuration option: +``` xml + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +and second (from `24.1`): +``` xml + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Configuration like +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to +``` xml + + object_storage + s3 + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +For details configuration options of each storage see [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md). ## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} From 9bcd4daabe56e29132fc5098420afb4dcba9001d Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Feb 2024 16:19:31 +0100 Subject: [PATCH 026/985] Better --- .../mergetree-family/mergetree.md | 294 +------------ docs/en/operations/storing-data.md | 411 +++++++++++++++--- 2 files changed, 346 insertions(+), 359 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index e1eef8db9ab..0fff13c906f 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -987,49 +987,6 @@ ORDER BY (postcode1, postcode2, addr1, addr2) # highlight-end ``` -### Nested Dynamic Storage - -This example query builds on the above dynamic disk configuration and shows how to -use a local disk to cache data from a table stored at a URL. Neither the cache disk -nor the web storage is configured in the ClickHouse configuration files; both are -configured in the CREATE/ATTACH query settings. - -In the settings highlighted below notice that the disk of `type=web` is nested within -the disk of `type=cache`. - -```sql -ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' -( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) -) -ENGINE = MergeTree -ORDER BY (postcode1, postcode2, addr1, addr2) - # highlight-start - SETTINGS disk = disk( - type=cache, - max_size='1Gi', - path='/var/lib/clickhouse/custom_disk_cache/', - disk=disk( - type=web, - endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' - ) - ); - # highlight-end -``` - ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: @@ -1058,19 +1015,17 @@ During this time, they are not moved to other volumes or disks. Therefore, until User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting. -## Using S3 for Data Storage {#table_engine-mergetree-s3} +## Using External Storage for Data Storage {#table_engine-mergetree-s3} -:::note -Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). -::: +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details. -`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`. +Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`. Configuration markup: ``` xml ... - +e s3 true @@ -1112,247 +1067,6 @@ Also see [configuring external storage options](/docs/en/operations/storing-data ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions. ::: -### Configuring the S3 disk - -Required parameters: - -- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. -- `access_key_id` — S3 access key id. -- `secret_access_key` — S3 secret access key. - -Optional parameters: - -- `region` — S3 region name. -- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. -- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. -- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. -- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. -- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. -- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. -- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. -- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. -- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. -- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. -- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. -- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. -- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. -- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. -- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. - -### Configuring the cache - -This is the cache configuration from above: -```xml - - cache - s3 - /var/lib/clickhouse/disks/s3_cache/ - 10Gi - -``` - -These parameters define the cache layer: -- `type` — If a disk is of type `cache` it caches mark and index files in memory. -- `disk` — The name of the disk that will be cached. - -Cache parameters: -- `path` — The path where metadata for the cache is stored. -- `max_size` — The size (amount of disk space) that the cache can grow to. - -:::tip -There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details. -::: - -S3 disk can be configured as `main` or `cold` storage: -``` xml - - ... - - - s3 - https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ - your_access_key_id - your_secret_access_key - - - - - -
- s3 -
-
-
- - -
- default -
- - s3 - -
- 0.2 -
-
- ... -
-``` - -In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. - -## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage} - -`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. - -As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented. - -Configuration markup: -``` xml - - ... - - - azure_blob_storage - http://account.blob.core.windows.net - container - account - pass123 - /var/lib/clickhouse/disks/blob_storage_disk/ - /var/lib/clickhouse/disks/blob_storage_disk/cache/ - false - - - ... - -``` - -Connection parameters: -* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`. -* `container_name` - Target container name, defaults to `default-container`. -* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet. - -Authentication parameters (the disk will try all available methods **and** Managed Identity Credential): -* `connection_string` - For authentication using a connection string. -* `account_name` and `account_key` - For authentication using Shared Key. - -Limit parameters (mainly for internal usage): -* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. -* `min_bytes_for_seek` - Limits the size of a seekable region. -* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. -* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. -* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. -* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. - -Other parameters: -* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. -* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. -* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). - -Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). - -:::note Zero-copy replication is not ready for production -Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. -::: - -## HDFS storage {#hdfs-storage} - -In this sample configuration: -- the disk is of type `hdfs` -- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` - -```xml - - - - - hdfs - hdfs://hdfs1:9000/clickhouse/ - true - - - local - / - - - - - -
- hdfs -
- - hdd - -
-
-
-
-
-``` - -## Web storage (read-only) {#web-storage} - -Web storage can be used for read-only purposes. An example use is for hosting sample -data, or for migrating data. - -:::tip -Storage can also be configured temporarily within a query, if a web dataset is not expected -to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the -configuration file. -::: - -In this sample configuration: -- the disk is of type `web` -- the data is hosted at `http://nginx:80/test1/` -- a cache on local storage is used - -```xml - - - - - web - http://nginx:80/test1/ - - - cache - web - cached_web_cache/ - 100000000 - - - - - -
- web -
-
-
- - -
- cached_web -
-
-
-
-
-
-``` - ## Virtual Columns {#virtual-columns} - `_part` — Name of a part. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 7a7edfb1a90..baf4e1999a7 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -5,21 +5,68 @@ sidebar_label: "External Disks for Storing Data" title: "External Disks for Storing Data" --- -Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)). +Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported: +1. [Amazon S3](https://aws.amazon.com/s3/) object storage. +2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) +3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. - -To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver). +Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` famility or `Log` family tables. ## Configuring external storage {#configuring-external-storage} [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. -Configuration markup: +Disk configuration requires: +1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +2. Configuration of a specific external storage type. -Let's take a loop at different storage configuration options on the example of `S3` storage. -Firstly, define configuration in server configuration file. In order to configure `S3` storage the following configuration can be used: +Starting from 24.1 clickhouse version, it is possible to use a new configuration option. +It requires to specify: +1. `type` equal to `object_storage` +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`. +E.g. configuration option +``` xml + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to configuration (from `24.1`): +``` xml + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Configuration +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to +``` xml + + object_storage + s3 + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Example of full storage configuration will look like: ``` xml @@ -43,8 +90,7 @@ Firstly, define configuration in server configuration file. In order to configur ``` -Starting with 24.1 clickhouse version, a different type of configuration is supported in addition to the older one: - +Starting with 24.1 clickhouse version, it can also look like: ``` xml @@ -71,7 +117,6 @@ Starting with 24.1 clickhouse version, a different type of configuration is supp ``` In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file: - ``` xml @@ -96,80 +141,259 @@ ENGINE = MergeTree() ORDER BY a SETTINGS disk = 's3'; ``` -There is also a possibility to specify storage configuration without a preconfigured disk in configuration file: +## Dynamic Configuration {#dynamic-configuration} -``` sql -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a -SETTINGS disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1); +There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the CREATE/ATTACH query settings. + +The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL. + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end ``` -Adding cache is also possible: +The example below adds cache to external storage. -``` sql -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a -SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1)); +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end ``` -A combination of config file disk configuration and sql-defined configuration is also possible: +In the settings highlighted below notice that the disk of `type=web` is nested within +the disk of `type=cache`. -``` sql -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a -SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = 's3'); +A combination of config-based configuration and sql-defined configuration is also possible: + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end ``` -Here `s3` is a disk name from server configuration file, while `cache` disk is defined via sql. +where `web` is a from a server configuration file: -Let's take a closer look at configuration parameters. - -All disk configuration require `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`. Then goes configuration of a specific storage type. -Starting from 24.1 clickhouse version, you can you a new configuration option. For it you are required to specify `type` as `object_storage`, `object_storage_type` as one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`, and optionally you can specify `metadata_type`, which is `local` by default, but it can also be set to `plain`, `web`. - -E.g. first configuration option: ``` xml - - s3 - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - + + + + web + 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + + + ``` -and second (from `24.1`): +### Using S3 Storage {#s3-storage} + +Required parameters: + +- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. +- `access_key_id` — S3 access key id. +- `secret_access_key` — S3 secret access key. + +Optional parameters: + +- `region` — S3 region name. +- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. +- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. +- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. +- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. +- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. +- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. +- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. +- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. +- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. +- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. +- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. + +:::note +Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). +::: + +### Using Azure Blob Storage {#azure-blob-storage} + +`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. + +As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented. + +Configuration markup: ``` xml - - object_storage - s3 - local - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - + + ... + + + azure_blob_storage + http://account.blob.core.windows.net + container + account + pass123 + /var/lib/clickhouse/disks/blob_storage_disk/ + /var/lib/clickhouse/disks/blob_storage_disk/cache/ + false + + + ... + ``` -Configuration like -``` xml - - s3_plain - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - +Connection parameters: +* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`. +* `container_name` - Target container name, defaults to `default-container`. +* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet. + +Authentication parameters (the disk will try all available methods **and** Managed Identity Credential): +* `connection_string` - For authentication using a connection string. +* `account_name` and `account_key` - For authentication using Shared Key. + +Limit parameters (mainly for internal usage): +* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. +* `min_bytes_for_seek` - Limits the size of a seekable region. +* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. +* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. +* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. +* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. + +Other parameters: +* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. +* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. +* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). + +Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). + +:::note Zero-copy replication is not ready for production +Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. +::: + +## Using HDFS storage {#hdfs-storage} + +In this sample configuration: +- the disk is of type `hdfs` +- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` + +```xml + + + + + hdfs + hdfs://hdfs1:9000/clickhouse/ + true + + + local + / + + + + + +
+ hdfs +
+ + hdd + +
+
+
+
+
``` -is equal to -``` xml - - object_storage - s3 - plain - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - -``` - -For details configuration options of each storage see [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md). - -## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} +### Using Data Encryption {#encrypted-virtual-file-system} You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. @@ -230,7 +454,7 @@ Example of disk configuration:
``` -## Using local cache {#using-local-cache} +### Using local cache {#using-local-cache} It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. @@ -393,7 +617,56 @@ Cache profile events: - `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds` -## Storing Data on Web Server {#storing-data-on-webserver} +### Using static Web storage (read-only) {#web-storage} + +Web storage can be used for read-only purposes. An example use is for hosting sample +data, or for migrating data. + +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the +configuration file. +::: + +In this sample configuration: +- the disk is of type `web` +- the data is hosted at `http://nginx:80/test1/` +- a cache on local storage is used + +```xml + + + + + web + http://nginx:80/test1/ + + + cache + web + cached_web_cache/ + 100000000 + + + + + +
+ web +
+
+
+ + +
+ cached_web +
+
+
+
+
+
+``` There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. @@ -595,7 +868,7 @@ If URL is not reachable on disk load when the server is starting up tables, then Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read. -## Zero-copy Replication (not ready for production) {#zero-copy} +### Zero-copy Replication (not ready for production) {#zero-copy} Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. From 5ae410e6339fe52e33b41bbc9c6c115ac6293f57 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Feb 2024 18:33:38 +0100 Subject: [PATCH 027/985] A bit more explanation --- .../mergetree-family/mergetree.md | 49 +------------------ docs/en/operations/storing-data.md | 44 ++++++++++++++++- 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 0fff13c906f..f23b251f3a1 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -940,53 +940,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting. -### Dynamic Storage - -This example query shows how to attach a table stored at a URL and configure the -remote storage within the query. The web storage is not configured in the ClickHouse -configuration files; all the settings are in the CREATE/ATTACH query. - -:::note -The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. -::: - -#### Example dynamic web storage - -:::tip -A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) -::: - -In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. - -```sql -# highlight-next-line -ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' -( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) -) -ENGINE = MergeTree -ORDER BY (postcode1, postcode2, addr1, addr2) - # highlight-start - SETTINGS disk = disk( - type=web, - endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' - ); - # highlight-end -``` - ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: @@ -1025,7 +978,7 @@ Configuration markup: ``` xml ... -e + s3 true diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index baf4e1999a7..0f818b813bf 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -213,6 +213,10 @@ ORDER BY (postcode1, postcode2, addr1, addr2) In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. +:::note +The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. +::: + A combination of config-based configuration and sql-defined configuration is also possible: ```sql @@ -302,6 +306,11 @@ Optional parameters: Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). ::: +### Using Plain Storage {#s3-storage} + +There is a disk type `s3_plain`, which provides a write-once storage. Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names as clickhouse stores files on local disk. So this disk type allows to keeper a static version of the table and can also be used to create backups on it. +Configuration parameters are the same as for `s3` disk type. + ### Using Azure Blob Storage {#azure-blob-storage} `MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. @@ -672,7 +681,40 @@ There is a tool `clickhouse-static-files-uploader`, which prepares a data direct This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). -Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`. +:::tip +A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) +::: + +In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. + +```sql +# highlight-next-line +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end +``` A ready test case. You need to add this configuration to config: From 09e630e02be9ccd19681b34f33e24cea849ca9fd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 15 Feb 2024 19:00:08 +0100 Subject: [PATCH 028/985] Update storing-data.md --- docs/en/operations/storing-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 0f818b813bf..60e33fe2849 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -10,7 +10,7 @@ Data, processed in ClickHouse, is usually stored in the local file system — on 2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) 3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` famility or `Log` family tables. +Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. ## Configuring external storage {#configuring-external-storage} From 7bf42fd86e9599357282f947312c98d2bec1047f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 16 Feb 2024 11:16:14 +0100 Subject: [PATCH 029/985] Fix upgrade check --- src/Core/SettingsChangesHistory.h | 3 ++- src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index c453dd837eb..b6d07d7057a 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,7 +93,8 @@ static std::map sett {"async_insert_busy_timeout_decrease_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout decreases"}, {"split_parts_ranges_into_intersecting_and_non_intersecting_final", true, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}, - {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}}}, + {"azure_max_single_part_copy_size", 256*1024*1024, 256*1024*1024, "The maximum size of object to copy using single part copy to Azure blob storage."}, + {"azure_max_inflight_parts_for_one_file", 20, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited."}}}, {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index d700090303a..74a8949b235 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -98,7 +98,7 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); - LOG_DEBUG(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); } void WriteBufferFromAzureBlobStorage::nextImpl() From a11e67d4aae4433dd0f3d8ee46ba40e1cd73fdd5 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 16 Feb 2024 16:41:58 +0100 Subject: [PATCH 030/985] Make max_insert_delayed_streams_for_parallel_write actually work --- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 1fb2393948a..f5494e56049 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -323,6 +323,9 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if (!temp_part.part) continue; + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) + support_parallel_write = true; + BlockIDsType block_id; if constexpr (async_insert) From 458793cc50b92361848c91803d07105a91acea85 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 16 Feb 2024 17:13:37 +0100 Subject: [PATCH 031/985] Review fix --- src/Storages/MergeTree/MergeTreeSink.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 36816904a81..ebc49e22d03 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -109,9 +109,14 @@ void MergeTreeSink::consume(Chunk chunk) } } - size_t max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE; - if (!support_parallel_write || settings.max_insert_delayed_streams_for_parallel_write.changed) + size_t max_insert_delayed_streams_for_parallel_write; + + if (settings.max_insert_delayed_streams_for_parallel_write.changed) max_insert_delayed_streams_for_parallel_write = settings.max_insert_delayed_streams_for_parallel_write; + else if (support_parallel_write) + max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE; + else + max_insert_delayed_streams_for_parallel_write = 0; /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); From f7b524465c60b15c85f579ca22c48d4c165bf6f2 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 16 Feb 2024 17:14:36 +0100 Subject: [PATCH 032/985] Followup --- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index f5494e56049..3cbdcf5106e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -368,9 +368,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) profile_events_scope.reset(); UInt64 elapsed_ns = watch.elapsed(); - size_t max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE; - if (!support_parallel_write || settings.max_insert_delayed_streams_for_parallel_write.changed) + size_t max_insert_delayed_streams_for_parallel_write; + if (settings.max_insert_delayed_streams_for_parallel_write.changed) max_insert_delayed_streams_for_parallel_write = settings.max_insert_delayed_streams_for_parallel_write; + else if (support_parallel_write) + max_insert_delayed_streams_for_parallel_write = DEFAULT_DELAYED_STREAMS_FOR_PARALLEL_WRITE; + else + max_insert_delayed_streams_for_parallel_write = 0; /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); From 1549725eddb6db299ba0297de21a51411607d2a3 Mon Sep 17 00:00:00 2001 From: unashi Date: Sun, 18 Feb 2024 19:26:12 +0800 Subject: [PATCH 033/985] [feature]: allow to attach parts from a different disk --- src/Storages/MergeTree/MergeTreeData.cpp | 13 ++ src/Storages/MergeTree/MergeTreeData.h | 9 + .../MergeTree/MergeTreeDataPartCloner.cpp | 70 ++++++- src/Storages/StorageMergeTree.cpp | 45 +++-- src/Storages/StorageReplicatedMergeTree.cpp | 42 ++-- .../__init__.py | 0 .../configs/remote_servers.xml | 17 ++ .../test_attach_partition_using_copy/test.py | 183 ++++++++++++++++++ 8 files changed, 353 insertions(+), 26 deletions(-) create mode 100644 tests/integration/test_attach_partition_using_copy/__init__.py create mode 100644 tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml create mode 100644 tests/integration/test_attach_partition_using_copy/test.py diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3ca746a7197..56710b157de 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7085,6 +7085,19 @@ std::pair MergeTreeData::cloneAn this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings); } +std::pair MergeTreeData::cloneAndLoadDataPartOnOtherDisk( + const MergeTreeData::DataPartPtr & src_part, + const String & tmp_part_prefix, + const MergeTreePartInfo & dst_part_info, + const StorageMetadataPtr & metadata_snapshot, + const IDataPartStorage::ClonePartParams & params, + const ReadSettings & read_settings, + const WriteSettings & write_settings) +{ + return MergeTreeDataPartCloner::clone( + this, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, require_part_metadata, params, read_settings, write_settings); +} + std::pair MergeTreeData::cloneAndLoadPartOnSameDiskWithDifferentPartitionKey( const MergeTreeData::DataPartPtr & src_part, const MergeTreePartition & new_partition, diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index dfdc22baa8f..a24362f68fc 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -866,6 +866,15 @@ public: ContextPtr local_context, Int64 min_block, Int64 max_block); + + std::pair cloneAndLoadDataPartOnOtherDisk( + const MergeTreeData::DataPartPtr & src_part, + const String & tmp_part_prefix, + const MergeTreePartInfo & dst_part_info, + const StorageMetadataPtr & metadata_snapshot, + const IDataPartStorage::ClonePartParams & params, + const ReadSettings & read_settings, + const WriteSettings & write_settings); static std::pair createPartitionAndMinMaxIndexFromSourcePart( const MergeTreeData::DataPartPtr & src_part, diff --git a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp index 04019d2c665..69b7abacc93 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCloner.cpp @@ -142,6 +142,30 @@ std::shared_ptr hardlinkAllFiles( params); } +std::shared_ptr cloneAllFiles( + MergeTreeData * merge_tree_data, + const DB::ReadSettings & read_settings, + const DB::WriteSettings & write_settings, + const DataPartStoragePtr & storage, + const String & path) +{ + for (const DiskPtr & disk : merge_tree_data->getStoragePolicy()->getDisks()) + { + try{ + return storage->clonePart( + merge_tree_data->getRelativeDataPath(), + path, + disk, + read_settings, + write_settings,{},{}); + }catch(...) { + LOG_TRACE(&Poco::Logger::get("MergeTreeDataPartCloner"), "Clone part on disk {} fail", disk->getName()); + } + } + LOG_FATAL(&Poco::Logger::get("MergeTreeDataPartCloner"), "Clone part on disks all fail"); + throw; +} + std::pair cloneSourcePart( MergeTreeData * merge_tree_data, const MergeTreeData::DataPartPtr & src_part, @@ -165,8 +189,18 @@ std::pair cloneSourcePart( auto src_part_storage = flushPartStorageToDiskIfInMemory( merge_tree_data, src_part, metadata_snapshot, tmp_part_prefix, tmp_dst_part_name, src_flushed_tmp_dir_lock, src_flushed_tmp_part); - - auto dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params); + std::shared_ptr dst_part_storage {}; + if (params.copy_instead_of_hardlink) { + dst_part_storage = cloneAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name); + } else { + try{ + dst_part_storage = hardlinkAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name, params); + } catch(...){ + // Hard link fail. Try copy. + LOG_WARNING(&Poco::Logger::get("MergeTreeDataPartCloner"), "Hard link fail, try tp copy directly. to:{}, path:{}", merge_tree_data->getRelativeDataPath(),tmp_dst_part_name); + dst_part_storage = cloneAllFiles(merge_tree_data, read_settings, write_settings, src_part_storage, tmp_dst_part_name); + } + } if (params.metadata_version_to_write.has_value()) { @@ -275,6 +309,25 @@ std::pair cloneAndHand return std::make_pair(destination_part, std::move(temporary_directory_lock)); } + +std::pair cloneInsteadOfHardlinksAndProjections( + MergeTreeData * merge_tree_data, + const DataPartPtr & src_part, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreePartInfo & dst_part_info, + const String & tmp_part_prefix, + const ReadSettings & read_settings, + const WriteSettings & write_settings, + const IDataPartStorage::ClonePartParams & params) +{ + chassert(!merge_tree_data->isStaticStorage()); + + auto [destination_part, temporary_directory_lock] = cloneSourcePart( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + + return std::make_pair(destination_part, std::move(temporary_directory_lock)); +} + } std::pair MergeTreeDataPartCloner::clone( @@ -288,10 +341,19 @@ std::pair MergeTreeDat const ReadSettings & read_settings, const WriteSettings & write_settings) { - auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( + if (params.copy_instead_of_hardlink) + { + auto [destination_part, temporary_directory_lock] = cloneInsteadOfHardlinksAndProjections( merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock)); + } + else + { + auto [destination_part, temporary_directory_lock] = cloneAndHandleHardlinksAndProjections( + merge_tree_data, src_part, metadata_snapshot, dst_part_info, tmp_part_prefix, read_settings, write_settings, params); + return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock)); - return std::make_pair(finalizePart(destination_part, params, require_part_metadata), std::move(temporary_directory_lock)); + } } std::pair MergeTreeDataPartCloner::cloneWithDistinctPartitionExpression( diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 0f75c726bce..0f95fef9c6e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2118,17 +2118,40 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con else { MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - my_metadata_snapshot, - clone_params, - local_context->getReadSettings(), - local_context->getWriteSettings()); - dst_parts.emplace_back(std::move(dst_part)); - dst_parts_locks.emplace_back(std::move(part_lock)); + LOG_TRACE(log, "Partition exps are the same:part id: {}; number of disks:{}",dst_part_info.partition_id, this->getStoragePolicy()->getDisks().size()); + bool on_same_disk = false; + for (const DiskPtr & disk : this->getStoragePolicy()->getDisks()) + { + if (disk->getName() == src_part->getDataPartStorage().getDiskName()) + on_same_disk = true; + } + if (on_same_disk) + { + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings()); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + clone_params.copy_instead_of_hardlink = true; + auto [dst_part, part_lock] = cloneAndLoadDataPartOnOtherDisk( + src_part, + TMP_PREFIX, + dst_part_info, + my_metadata_snapshot, + clone_params, + local_context->getReadSettings(), + local_context->getWriteSettings()); + dst_parts.emplace_back(std::move(dst_part)); + dst_parts_locks.emplace_back(std::move(part_lock)); + } } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6bd57cc4d6d..ba0d27fe612 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8015,17 +8015,37 @@ void StorageReplicatedMergeTree::replacePartitionFrom( { MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level); - auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( - src_part, - TMP_PREFIX, - dst_part_info, - metadata_snapshot, - clone_params, - query_context->getReadSettings(), - query_context->getWriteSettings()); - - dst_parts.emplace_back(dst_part); - dst_parts_locks.emplace_back(std::move(part_lock)); + bool on_same_disk = false; + for (const DiskPtr & disk : this->getStoragePolicy()->getDisks()) + if (disk->getName() == src_part->getDataPartStorage().getDiskName()) + on_same_disk = true; + if (on_same_disk) + { + auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings()); + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); + } + else + { + clone_params.copy_instead_of_hardlink = true; + auto [dst_part, part_lock] = cloneAndLoadDataPartOnOtherDisk( + src_part, + TMP_PREFIX, + dst_part_info, + metadata_snapshot, + clone_params, + query_context->getReadSettings(), + query_context->getWriteSettings()); + dst_parts.emplace_back(dst_part); + dst_parts_locks.emplace_back(std::move(part_lock)); + } } src_parts.emplace_back(src_part); diff --git a/tests/integration/test_attach_partition_using_copy/__init__.py b/tests/integration/test_attach_partition_using_copy/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml new file mode 100644 index 00000000000..b40730e9f7d --- /dev/null +++ b/tests/integration/test_attach_partition_using_copy/configs/remote_servers.xml @@ -0,0 +1,17 @@ + + + + + true + + replica1 + 9000 + + + replica2 + 9000 + + + + + diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py new file mode 100644 index 00000000000..effb5708cf3 --- /dev/null +++ b/tests/integration/test_attach_partition_using_copy/test.py @@ -0,0 +1,183 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +replica1 = cluster.add_instance( + "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) +replica2 = cluster.add_instance( + "replica2", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + except Exception as ex: + print(ex) + finally: + cluster.shutdown() + + +def cleanup(nodes): + for node in nodes: + node.query("DROP TABLE IF EXISTS source SYNC") + node.query("DROP TABLE IF EXISTS destination SYNC") + + +def create_source_table(node, table_name, replicated): + replica = node.name + engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" + if replicated + else "MergeTree()" + ) + node.query_with_retry( + """ + ATTACH TABLE {table_name} UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' + ( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) + ) + ENGINE = {engine} + ORDER BY (postcode1, postcode2, addr1, addr2) + SETTINGS disk = disk(type = web, endpoint = 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/') + """.format( + table_name=table_name, + engine=engine + ) + ) + + + +def create_destination_table(node, table_name, replicated): + replica = node.name + engine = ( + f"ReplicatedMergeTree('/clickhouse/tables/1/{table_name}', '{replica}')" + if replicated + else "MergeTree()" + ) + node.query_with_retry( + """ + CREATE TABLE {table_name} + ( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) + ) + ENGINE = {engine} + ORDER BY (postcode1, postcode2, addr1, addr2) + """.format( + table_name=table_name, + engine=engine + ) + ) + +def test_both_mergtree(start_cluster): + create_source_table(replica1, "source", False) + create_destination_table(replica1, "destination", False) + + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"), + ) + + assert_eq_with_retry( + replica1, f"SELECT town from destination LIMIT 1", + "SCARBOROUGH" + ) + + cleanup([replica1]) + +def test_all_replicated(start_cluster): + create_source_table(replica1, "source", True) + create_destination_table(replica1, "destination", True) + create_destination_table(replica2, "destination", True) + + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"), + ) + assert_eq_with_retry( + replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", + replica2.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC"), + ) + + assert_eq_with_retry( + replica1, f"SELECT town from destination LIMIT 1", + "SCARBOROUGH" + ) + + assert_eq_with_retry( + replica2, f"SELECT town from destination LIMIT 1", + "SCARBOROUGH" + ) + + cleanup([replica1, replica2]) + +def test_only_destination_replicated(start_cluster): + create_source_table(replica1, "source", False) + create_destination_table(replica1, "destination", True) + create_destination_table(replica2, "destination", True) + + replica1.query("SYSTEM SYNC REPLICA destination") + replica1.query( + f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source" + ) + + assert_eq_with_retry( + replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"), + ) + assert_eq_with_retry( + replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", + replica2.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC"), + ) + + assert_eq_with_retry( + replica1, f"SELECT town from destination LIMIT 1", + "SCARBOROUGH" + ) + + assert_eq_with_retry( + replica2, f"SELECT town from destination LIMIT 1", + "SCARBOROUGH" + ) + + cleanup([replica1, replica2]) From 8c11f59ba82bd9ae3a322f7a9729c4a5a8644512 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:01:37 +0100 Subject: [PATCH 034/985] Fix bad link, update disk web description --- docs/en/operations/storing-data.md | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 60e33fe2849..4b0345a3206 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -628,14 +628,9 @@ Cache profile events: ### Using static Web storage (read-only) {#web-storage} -Web storage can be used for read-only purposes. An example use is for hosting sample -data, or for migrating data. - -:::tip -Storage can also be configured temporarily within a query, if a web dataset is not expected -to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the -configuration file. -::: +This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). +Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data. +There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. In this sample configuration: - the disk is of type `web` @@ -677,9 +672,11 @@ In this sample configuration:
``` -There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. - -This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the +configuration file. +::: :::tip A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) From 601b1dfaa14323db28f169b6b193d59ec75e8bfc Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:21:52 +0100 Subject: [PATCH 035/985] Fix bad link --- docs/en/operations/storing-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 4b0345a3206..4f676904375 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -10,7 +10,7 @@ Data, processed in ClickHouse, is usually stored in the local file system — on 2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) 3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. +Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. ## Configuring external storage {#configuring-external-storage} From 80fe3f78d99caeaed733548ca65b6bd466730d51 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 20 Feb 2024 11:12:09 +0800 Subject: [PATCH 036/985] [fix] black the python script --- .../test_attach_partition_using_copy/test.py | 92 ++++++++++--------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/tests/integration/test_attach_partition_using_copy/test.py b/tests/integration/test_attach_partition_using_copy/test.py index effb5708cf3..df5378742ae 100644 --- a/tests/integration/test_attach_partition_using_copy/test.py +++ b/tests/integration/test_attach_partition_using_copy/test.py @@ -59,13 +59,11 @@ def create_source_table(node, table_name, replicated): ORDER BY (postcode1, postcode2, addr1, addr2) SETTINGS disk = disk(type = web, endpoint = 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/') """.format( - table_name=table_name, - engine=engine + table_name=table_name, engine=engine ) ) - def create_destination_table(node, table_name, replicated): replica = node.name engine = ( @@ -95,89 +93,95 @@ def create_destination_table(node, table_name, replicated): ENGINE = {engine} ORDER BY (postcode1, postcode2, addr1, addr2) """.format( - table_name=table_name, - engine=engine + table_name=table_name, engine=engine ) ) + def test_both_mergtree(start_cluster): create_source_table(replica1, "source", False) create_destination_table(replica1, "destination", False) - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source" - ) - + replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + assert_eq_with_retry( - replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"), + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" + ), ) - + assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", - "SCARBOROUGH" + replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" ) cleanup([replica1]) + def test_all_replicated(start_cluster): create_source_table(replica1, "source", True) create_destination_table(replica1, "destination", True) create_destination_table(replica2, "destination", True) replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source" + replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" + ), + ) + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", + replica2.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" + ), ) assert_eq_with_retry( - replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"), - ) - assert_eq_with_retry( - replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", - replica2.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC"), + replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" ) assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", - "SCARBOROUGH" - ) - - assert_eq_with_retry( - replica2, f"SELECT town from destination LIMIT 1", - "SCARBOROUGH" + replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" ) cleanup([replica1, replica2]) + def test_only_destination_replicated(start_cluster): create_source_table(replica1, "source", False) create_destination_table(replica1, "destination", True) create_destination_table(replica2, "destination", True) replica1.query("SYSTEM SYNC REPLICA destination") - replica1.query( - f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source" + replica1.query(f"ALTER TABLE destination ATTACH PARTITION tuple() FROM source") + + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", + replica1.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC" + ), + ) + assert_eq_with_retry( + replica1, + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", + replica2.query( + f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC" + ), ) assert_eq_with_retry( - replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC", - replica1.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC"), - ) - assert_eq_with_retry( - replica1, f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM source GROUP BY year ORDER BY year ASC", - replica2.query(f"SELECT toYear(date) AS year,round(avg(price)) AS price,bar(price, 0, 1000000, 80) FROM destination GROUP BY year ORDER BY year ASC"), + replica1, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" ) assert_eq_with_retry( - replica1, f"SELECT town from destination LIMIT 1", - "SCARBOROUGH" - ) - - assert_eq_with_retry( - replica2, f"SELECT town from destination LIMIT 1", - "SCARBOROUGH" + replica2, f"SELECT town from destination LIMIT 1", "SCARBOROUGH" ) cleanup([replica1, replica2]) From 8de4a9dbfd32b7e82764a5c8efff3916b5c7ccda Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 20 Feb 2024 11:42:40 +0800 Subject: [PATCH 037/985] [fix] delete trailing whitespaces --- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 5f387385d38..081087acbaa 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -853,7 +853,7 @@ public: const IDataPartStorage::ClonePartParams & params, const ReadSettings & read_settings, const WriteSettings & write_settings); - + std::pair cloneAndLoadDataPartOnOtherDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index a2713775e65..47684925182 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2081,7 +2081,7 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con /// This will generate unique name in scope of current server process. Int64 temp_index = insert_increment.get(); MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); - + IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; LOG_TRACE(log, "Partition exps are the same:part id: {}; number of disks:{}",dst_part_info.partition_id, this->getStoragePolicy()->getDisks().size()); bool on_same_disk = false; From 6437877a712bfaf4a36c180b332a0d6a37981af1 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 20 Feb 2024 20:31:59 +0800 Subject: [PATCH 038/985] [fix] add changelog; change some feature logic --- CHANGELOG.md | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 10 +++++++--- src/Storages/StorageMergeTree.cpp | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 3 +-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3e5dd709ab..fd4ff90f841 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ * Added `FROM ` modifier for `SYSTEM SYNC REPLICA LIGHTWEIGHT` query. With the `FROM` modifier ensures we wait for fetches and drop-ranges only for the specified source replicas, as well as any replica not in zookeeper or with an empty source_replica. [#58393](https://github.com/ClickHouse/ClickHouse/pull/58393) ([Jayme Bird](https://github.com/jaymebrd)). * Added setting `update_insert_deduplication_token_in_dependent_materialized_views`. This setting allows to update insert deduplication token with table identifier during insert in dependent materialized views. Closes [#59165](https://github.com/ClickHouse/ClickHouse/issues/59165). [#59238](https://github.com/ClickHouse/ClickHouse/pull/59238) ([Maksim Kita](https://github.com/kitaisreal)). * Added statement `SYSTEM RELOAD ASYNCHRONOUS METRICS` which updates the asynchronous metrics. Mostly useful for testing and development. [#53710](https://github.com/ClickHouse/ClickHouse/pull/53710) ([Robert Schulze](https://github.com/rschu1ze)). +* Attach parts from a different disk `ALTER TABLE destination ATTACH PARTITION tuple() FROM source` where source is an [instant table](https://github.com/ClickHouse/web-tables-demo). [#60112](https://github.com/ClickHouse/ClickHouse/pull/60112)([Unalian](https://github.com/Unalian)). #### Performance Improvement * Coordination for parallel replicas is rewritten for better parallelism and cache locality. It has been tested for linear scalability on hundreds of replicas. It also got support for reading in order. [#57968](https://github.com/ClickHouse/ClickHouse/pull/57968) ([Nikita Taranov](https://github.com/nickitat)). diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 64787d3509b..be1346e0ea2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "Common/logger_useful.h" #include #include #include @@ -7170,7 +7171,9 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - throw; + { + LOG_FATAL(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail"); + } } @@ -7301,8 +7304,9 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - throw; - + { + LOG_FATAL( &Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail."); + } if (params.metadata_version_to_write.has_value()) { chassert(!params.keep_metadata_version); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 47684925182..0748ac2dbdf 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2104,7 +2104,6 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con } else { - clone_params.copy_instead_of_hardlink = true; auto [dst_part, part_lock] = cloneAndLoadDataPartOnOtherDisk( src_part, TMP_PREFIX, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index df261053360..2460d2704c4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7933,7 +7933,7 @@ void StorageReplicatedMergeTree::replacePartitionFrom( for (const DiskPtr & disk : this->getStoragePolicy()->getDisks()) if (disk->getName() == src_part->getDataPartStorage().getDiskName()) on_same_disk = true; - if (on_same_disk) + if (on_same_disk && !clone_params.copy_instead_of_hardlink) { auto [dst_part, part_lock] = cloneAndLoadDataPartOnSameDisk( src_part, @@ -7948,7 +7948,6 @@ void StorageReplicatedMergeTree::replacePartitionFrom( } else { - clone_params.copy_instead_of_hardlink = true; auto [dst_part, part_lock] = cloneAndLoadDataPartOnOtherDisk( src_part, TMP_PREFIX, From fc3ebe007b3b5dc905ecbd63ed402547a1cde3a5 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 20 Feb 2024 20:54:32 +0800 Subject: [PATCH 039/985] [fix] rm whitespaces --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index be1346e0ea2..18bb0966bfc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7171,9 +7171,7 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - { LOG_FATAL(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail"); - } } @@ -7304,9 +7302,7 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - { LOG_FATAL( &Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail."); - } if (params.metadata_version_to_write.has_value()) { chassert(!params.keep_metadata_version); From f829a97d9130de5609e07e237b9486847422bc8c Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 20 Feb 2024 21:08:24 +0800 Subject: [PATCH 040/985] [fix] rm whitespaces --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 18bb0966bfc..849ceb1b66d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7302,7 +7302,7 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - LOG_FATAL( &Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail."); + LOG_FATAL(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail."); if (params.metadata_version_to_write.has_value()) { chassert(!params.keep_metadata_version); From 28282eee91add78e5b18202bd38566d1d3797083 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 20 Feb 2024 21:37:09 +0800 Subject: [PATCH 041/985] [fix] Add description in partition.md --- docs/en/sql-reference/statements/alter/partition.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index 114b8d5ffe3..277e174bb05 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -116,6 +116,8 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same indices and projections. - Both tables must have the same storage policy. +If both tables have the same storage policy, use hardlink to attach partition. Otherwise, use copying the data to attach partition. + ## REPLACE PARTITION ``` sql From 1731a5a8afba5a48ce01cea20e0cdc1f91316841 Mon Sep 17 00:00:00 2001 From: unashi Date: Wed, 21 Feb 2024 10:55:32 +0800 Subject: [PATCH 042/985] [improve]change the integration test test_multiple_disks::test_move_across_policies_not_work to test_move_across_policies_work_for_attach_not_work_for_move --- tests/integration/test_multiple_disks/test.py | 36 +++++++------------ 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fdd81284b2a..9584ace7f45 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -5,6 +5,7 @@ import string import threading import time from multiprocessing.dummy import Pool +from helpers.test_tools import assert_eq_with_retry import pytest from helpers.client import QueryRuntimeException @@ -1745,9 +1746,9 @@ def test_move_while_merge(start_cluster): node1.query(f"DROP TABLE IF EXISTS {name} SYNC") -def test_move_across_policies_does_not_work(start_cluster): +def test_move_across_policies_work_for_attach_not_work_for_move(start_cluster): try: - name = "test_move_across_policies_does_not_work" + name = "test_move_across_policies_work_for_attach_not_work_for_move" node1.query( """ @@ -1783,25 +1784,18 @@ def test_move_across_policies_does_not_work(start_cluster): except QueryRuntimeException: """All parts of partition 'all' are already on disk 'jbod2'.""" - with pytest.raises( - QueryRuntimeException, - match=".*because disk does not belong to storage policy.*", - ): - node1.query( - """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( - name=name - ) + node1.query( + """ALTER TABLE {name}2 ATTACH PARTITION tuple() FROM {name}""".format( + name=name ) - - with pytest.raises( - QueryRuntimeException, - match=".*because disk does not belong to storage policy.*", - ): + ) + assert_eq_with_retry( + node1, + """SELECT * FROM {name}2""".format(name=name), node1.query( - """ALTER TABLE {name}2 REPLACE PARTITION tuple() FROM {name}""".format( - name=name - ) - ) + """SELECT * FROM {name}""".format(name=name), + ), + ) with pytest.raises( QueryRuntimeException, @@ -1813,10 +1807,6 @@ def test_move_across_policies_does_not_work(start_cluster): ) ) - assert node1.query( - """SELECT * FROM {name}""".format(name=name) - ).splitlines() == ["1"] - finally: node1.query(f"DROP TABLE IF EXISTS {name} SYNC") node1.query(f"DROP TABLE IF EXISTS {name}2 SYNC") From e98d09c93e6c54a2cc4eadab8614539c0a5eb0f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2024 21:41:04 +0100 Subject: [PATCH 043/985] Do not load useless columns from the index in memory --- contrib/rapidjson | 2 +- src/Processors/QueryPlan/PartsSplitter.cpp | 9 ++++--- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 21 +++++++++++++++ .../MergeTree/MergeTreeDataSelectExecutor.cpp | 26 +++++++++++++++---- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 5 files changed, 49 insertions(+), 11 deletions(-) diff --git a/contrib/rapidjson b/contrib/rapidjson index c4ef90ccdbc..a9bc56c9165 160000 --- a/contrib/rapidjson +++ b/contrib/rapidjson @@ -1 +1 @@ -Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa +Subproject commit a9bc56c9165f1dbbbcada64221bd3a59042c5b95 diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 0fc6ddd6408..fcb1d8dd92c 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -32,9 +32,9 @@ std::string toString(const Values & value) int compareValues(const Values & lhs, const Values & rhs) { - chassert(lhs.size() == rhs.size()); + size_t size = std::min(lhs.size(), rhs.size()); - for (size_t i = 0; i < lhs.size(); ++i) + for (size_t i = 0; i < size; ++i) { if (applyVisitor(FieldVisitorAccurateLess(), lhs[i], rhs[i])) return -1; @@ -55,8 +55,9 @@ public: Values getValue(size_t part_idx, size_t mark) const { const auto & index = parts[part_idx].data_part->getIndex(); - Values values(index.size()); - for (size_t i = 0; i < values.size(); ++i) + size_t size = index.size(); + Values values(size); + for (size_t i = 0; i < size; ++i) { index[i]->get(mark, values[i]); if (values[i].isNull()) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 11ede661f78..629f3688874 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -869,6 +869,27 @@ void IMergeTreeDataPart::loadIndex() const for (size_t j = 0; j < key_size; ++j) key_serializations[j]->deserializeBinary(*loaded_index[j], *index_file, {}); + /// Cut useless suffix columns, if necessary. + Float64 ratio_to_drop_suffix_columns = storage.getSettings()->primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns; + if (key_size > 1 && ratio_to_drop_suffix_columns > 0 && ratio_to_drop_suffix_columns < 1) + { + chassert(marks_count > 0); + for (size_t j = 0; j < key_size - 1; ++j) + { + size_t num_changes = 0; + for (size_t i = 1; i < marks_count; ++i) + if (0 != loaded_index[j]->compareAt(i, i - 1, *loaded_index[j], 0)) + ++num_changes; + + if (static_cast(num_changes) / marks_count >= ratio_to_drop_suffix_columns) + { + key_size = j + 1; + loaded_index.resize(key_size); + break; + } + } + } + for (size_t i = 0; i < key_size; ++i) { loaded_index[i]->shrinkToFit(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 1ba28713680..175419f20e0 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1110,7 +1110,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( DataTypes key_types; for (size_t i : key_indices) { - index_columns->emplace_back(ColumnWithTypeAndName{index[i], primary_key.data_types[i], primary_key.column_names[i]}); + if (i < index.size()) + index_columns->emplace_back(index[i], primary_key.data_types[i], primary_key.column_names[i]); + else + index_columns->emplace_back(); /// The column of the primary key was not loaded in memory - we'll skip it. + key_types.emplace_back(primary_key.data_types[i]); } @@ -1119,7 +1123,6 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::function create_field_ref; if (key_condition.hasMonotonicFunctionsChain()) { - create_field_ref = [index_columns](size_t row, size_t column, FieldRef & field) { field = {index_columns.get(), row, column}; @@ -1159,7 +1162,11 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( { for (size_t i = 0; i < used_key_size; ++i) { - create_field_ref(range.begin, i, index_left[i]); + if ((*index_columns)[i].column) + create_field_ref(range.begin, i, index_left[i]); + else + index_left[i] = NEGATIVE_INFINITY; + index_right[i] = POSITIVE_INFINITY; } } @@ -1170,8 +1177,17 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( for (size_t i = 0; i < used_key_size; ++i) { - create_field_ref(range.begin, i, index_left[i]); - create_field_ref(range.end, i, index_right[i]); + if ((*index_columns)[i].column) + { + create_field_ref(range.begin, i, index_left[i]); + create_field_ref(range.end, i, index_right[i]); + } + else + { + /// If the PK column was not loaded in memory - exclude it from the analysis. + index_left[i] = NEGATIVE_INFINITY; + index_right[i] = POSITIVE_INFINITY; + } } } key_condition_maybe_true = key_condition.mayBeTrueInRange(used_key_size, index_left.data(), index_right.data(), key_types); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index b64632b6139..1cff44142bc 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -202,7 +202,7 @@ struct Settings; M(UInt64, marks_compress_block_size, 65536, "Mark compress block size, the actual size of the block to compress.", 0) \ M(UInt64, primary_key_compress_block_size, 65536, "Primary compress block size, the actual size of the block to compress.", 0) \ M(Bool, primary_key_lazy_load, true, "Load primary key in memory on first use instead of on table initialization. This can save memory in the presence of a large number of tables.", 0) \ - \ + M(Float, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns, 0.9f, "If the value of a column of the primary key in data part changes at least in this ratio of times, skip loading next columns in memory. This allows to save memory usage by not loading useless columns of the primary key.", 0) \ /** Projection settings. */ \ M(UInt64, max_projections, 25, "The maximum number of merge tree projections.", 0) \ From 31de27b149ab2922647ff7d9141871330cc9d743 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2024 21:42:19 +0100 Subject: [PATCH 044/985] Do not load useless columns from the index in memory --- contrib/rapidjson | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/rapidjson b/contrib/rapidjson index a9bc56c9165..c4ef90ccdbc 160000 --- a/contrib/rapidjson +++ b/contrib/rapidjson @@ -1 +1 @@ -Subproject commit a9bc56c9165f1dbbbcada64221bd3a59042c5b95 +Subproject commit c4ef90ccdbc21d5d5a628d08316bfd301e32d6fa From d0d84a840151675ee3c7a108709e4b0b486af577 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Feb 2024 23:03:01 +0100 Subject: [PATCH 045/985] Add a test --- .../02998_primary_key_skip_columns.reference | 18 ++++++++++ .../02998_primary_key_skip_columns.sql | 33 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tests/queries/0_stateless/02998_primary_key_skip_columns.reference create mode 100644 tests/queries/0_stateless/02998_primary_key_skip_columns.sql diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.reference b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference new file mode 100644 index 00000000000..9df0a2c097c --- /dev/null +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference @@ -0,0 +1,18 @@ +100000 +14954 +798 +15908 +108 +120 +2334 +19 +Key size: 2400000 +100000 +14954 +798 +15907 +108 +120 +2334 +19 +Key size: 800008 diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql new file mode 100644 index 00000000000..801fa35fb52 --- /dev/null +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a UInt64, b UInt64, c UInt64) ENGINE = MergeTree ORDER BY (a, b, c) SETTINGS index_granularity = 1, primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 1; +INSERT INTO test SELECT sipHash64(number, 1), sipHash64(number, 2), sipHash64(number, 3) FROM numbers(100000); + +SELECT count() FROM test; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; + +SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +ALTER TABLE test MODIFY SETTING primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns = 0.9; + +DETACH TABLE test; +ATTACH TABLE test; + +SELECT count() FROM test; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137; +SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; +SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; + +SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), 5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; + +DROP TABLE test; From 395ad35c93a185291a16449b1ac4d1dcecb1a127 Mon Sep 17 00:00:00 2001 From: Shanfeng Pang Date: Thu, 22 Feb 2024 10:41:16 +0800 Subject: [PATCH 046/985] fix LRUResource Cache bug --- src/Common/LRUResourceCache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/LRUResourceCache.h b/src/Common/LRUResourceCache.h index 4ccaa272346..60b4053bff5 100644 --- a/src/Common/LRUResourceCache.h +++ b/src/Common/LRUResourceCache.h @@ -221,7 +221,7 @@ private: { std::lock_guard lock(mutex); auto it = cells.find(key); - if (it != cells.end() && !it->second.expired) + if (it != cells.end()) { if (!it->second.expired) { From 999cf88ab79cf71bc82e7be3140496697a661416 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Feb 2024 09:54:39 +0100 Subject: [PATCH 047/985] Improve test --- tests/queries/0_stateless/02998_primary_key_skip_columns.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql index 801fa35fb52..b2dadcc5e7c 100644 --- a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql @@ -28,6 +28,6 @@ SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 80403209398191531 SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; -SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), 5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; +SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; DROP TABLE test; From 9b10aebecc690e36ec3591ba7115991b00920289 Mon Sep 17 00:00:00 2001 From: Shanfeng Pang Date: Thu, 22 Feb 2024 17:24:59 +0800 Subject: [PATCH 048/985] add unit-test for bug fix --- src/Common/tests/gtest_lru_resource_cache.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/Common/tests/gtest_lru_resource_cache.cpp b/src/Common/tests/gtest_lru_resource_cache.cpp index bc037824ff8..94490d1e86d 100644 --- a/src/Common/tests/gtest_lru_resource_cache.cpp +++ b/src/Common/tests/gtest_lru_resource_cache.cpp @@ -45,6 +45,33 @@ struct MyWeight size_t operator()(const int & x) const { return static_cast(x); } }; +TEST(LRUResourceCache, remove2) +{ + using MyCache = DB::LRUResourceCache; + auto mcache = MyCache(10, 10); + for (int i = 1; i < 5; ++i) + { + auto load_int = [&] { return std::make_shared(i); }; + mcache.getOrSet(i, load_int); + } + + auto n = mcache.size(); + ASSERT_EQ(n, 4); + auto w = mcache.weight(); + ASSERT_EQ(w, 10); + auto holder4 = mcache.get(4); + ASSERT_TRUE(holder4 != nullptr); + mcache.tryRemove(4); + auto holder_reget_4 = mcache.get(4); + ASSERT_TRUE(holder_reget_4 == nullptr); + mcache.getOrSet(4, [&]() { return std::make_shared(4); }); + holder4.reset(); + auto holder1 = mcache.getOrSet(1, [&]() { return std::make_shared(1); }); + ASSERT_TRUE(holder1 != nullptr); + auto holder7 = mcache.getOrSet(7, [&] { return std::make_shared(7); }); + ASSERT_TRUE(holder7 != nullptr); +} + TEST(LRUResourceCache, evictOnWweight) { using MyCache = DB::LRUResourceCache; From 3ef159853c78e438e6088d60a64bcff2bbb77b17 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 22 Feb 2024 14:34:18 +0000 Subject: [PATCH 049/985] Do something to the test --- .../0_stateless/02998_primary_key_skip_columns.reference | 2 +- tests/queries/0_stateless/02998_primary_key_skip_columns.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.reference b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference index 9df0a2c097c..ec44acbd16b 100644 --- a/tests/queries/0_stateless/02998_primary_key_skip_columns.reference +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference @@ -15,4 +15,4 @@ Key size: 2400000 120 2334 19 -Key size: 800008 +Key size ok: 1 1 diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql index b2dadcc5e7c..27672d7854e 100644 --- a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql @@ -28,6 +28,6 @@ SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 80403209398191531 SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; -SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; +SELECT 'Key size ok: ', (sum(primary_key_bytes_in_memory) as s) >= 800000, s < 1200000 FROM system.parts WHERE database = currentDatabase() AND table = 'test'; DROP TABLE test; From f1e95fb78bae190bb87e93704cf5f88c70cdccf4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 22 Feb 2024 15:38:44 +0100 Subject: [PATCH 050/985] Add a way to force read-through cache for merges --- src/Core/Settings.h | 1 + src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 10 +- .../Cached/CachedObjectStorage.cpp | 14 -- .../Cached/CachedObjectStorage.h | 2 - src/IO/ReadSettings.h | 2 +- src/Interpreters/Cache/FileSegment.cpp | 3 +- src/Interpreters/Context.cpp | 1 + .../MergeTree/MergeTreeSequentialSource.cpp | 2 +- .../integration/test_filesystem_cache/test.py | 79 ++++++++ .../users.d/cache_on_write_operations.xml | 7 + .../force_read_through_cache_on_merge.xml | 7 + ...system_cache_on_write_operations.reference | 170 ++++++++++++++++++ ...41_filesystem_cache_on_write_operations.sh | 81 +++++---- 13 files changed, 317 insertions(+), 62 deletions(-) create mode 100644 tests/integration/test_filesystem_cache/users.d/cache_on_write_operations.xml create mode 100644 tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 433195af9c3..db060bf712d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -769,6 +769,7 @@ class IColumn; M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \ M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \ M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, but don't put more entries into the cache. If you set this setting for heavy ad-hoc queries and leave it disabled for short real-time queries, this will allows to avoid cache threshing by too heavy queries and to improve the overall system efficiency.", 0) \ + M(Bool, force_read_through_cache_for_merges, false, "Force read-through cache for merges", 0) \ M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \ M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \ M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 0b3ecca3587..1da39c7011c 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -16,12 +16,10 @@ using namespace DB; namespace { -bool withCache(const ReadSettings & settings) -{ - return settings.remote_fs_cache && settings.enable_filesystem_cache - && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache - || !settings.avoid_readthrough_cache_outside_query_context); -} + bool withCache(const ReadSettings & settings) + { + return settings.remote_fs_cache && settings.enable_filesystem_cache; + } } namespace DB diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index 1444f4c9c76..e3ab772e3b5 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -43,10 +43,6 @@ ReadSettings CachedObjectStorage::patchSettings(const ReadSettings & read_settin { ReadSettings modified_settings{read_settings}; modified_settings.remote_fs_cache = cache; - - if (!canUseReadThroughCache(read_settings)) - modified_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; - return object_storage->patchSettings(modified_settings); } @@ -206,14 +202,4 @@ String CachedObjectStorage::getObjectsNamespace() const return object_storage->getObjectsNamespace(); } -bool CachedObjectStorage::canUseReadThroughCache(const ReadSettings & settings) -{ - if (!settings.avoid_readthrough_cache_outside_query_context) - return true; - - return CurrentThread::isInitialized() - && CurrentThread::get().getQueryContext() - && !CurrentThread::getQueryId().empty(); -} - } diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index 437baead7be..961c2709efc 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -119,8 +119,6 @@ public: const FileCacheSettings & getCacheSettings() const { return cache_settings; } - static bool canUseReadThroughCache(const ReadSettings & settings); - #if USE_AZURE_BLOB_STORAGE std::shared_ptr getAzureBlobStorageClient() override { diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index c397689d6ad..2c79735317d 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -99,7 +99,7 @@ struct ReadSettings bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; bool enable_filesystem_cache_log = false; /// Don't populate cache when the read is not part of query execution (e.g. background thread). - bool avoid_readthrough_cache_outside_query_context = true; + bool force_read_through_cache_merges = false; size_t filesystem_cache_segments_batch_size = 20; size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 8bd89465917..7c0505889da 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include @@ -194,7 +195,7 @@ bool FileSegment::isDownloaded() const String FileSegment::getCallerId() { if (!CurrentThread::isInitialized() || CurrentThread::getQueryId().empty()) - return "None:" + toString(getThreadId()); + return fmt::format("None:{}:{}", getThreadName(), toString(getThreadId())); return std::string(CurrentThread::getQueryId()) + ":" + toString(getThreadId()); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 55a4df10206..36b362e36bb 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5079,6 +5079,7 @@ ReadSettings Context::getReadSettings() const res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; + res.force_read_through_cache_merges = settings.force_read_through_cache_for_merges; res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index d0fbc316024..e375e8b0a9f 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -151,7 +151,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( const auto & context = storage.getContext(); ReadSettings read_settings = context->getReadSettings(); - read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = true; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = !read_settings.force_read_through_cache_merges; /// It does not make sense to use pthread_threadpool for background merges/mutations /// And also to preserve backward compatibility read_settings.local_fs_method = LocalFSReadMethod::pread; diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index eb5f896f7a9..c1ba6702dcf 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -19,6 +19,9 @@ def cluster(): main_configs=[ "config.d/storage_conf.xml", ], + user_configs=[ + "users.d/cache_on_write_operations.xml", + ], stay_alive=True, ) cluster.add_instance( @@ -35,6 +38,17 @@ def cluster(): ], stay_alive=True, ) + cluster.add_instance( + "node_force_read_through_cache_on_merge", + main_configs=[ + "config.d/storage_conf.xml", + ], + user_configs=[ + "users.d/force_read_through_cache_on_merge.xml", + "users.d/cache_on_write_operations.xml", + ], + stay_alive=True, + ) logging.info("Starting cluster...") cluster.start() @@ -323,3 +337,68 @@ def test_custom_cached_disk(cluster): "SELECT cache_path FROM system.disks WHERE name = 'custom_cached4'" ).strip() ) + + +def test_force_filesystem_cache_on_merges(cluster): + def test(node, forced_read_through_cache_on_merge): + node.query( + """ + DROP TABLE IF EXISTS test SYNC; + + CREATE TABLE test (key UInt32, value String) + Engine=MergeTree() + ORDER BY value + SETTINGS disk = disk( + type = cache, + path = 'force_cache_on_merges', + disk = 'hdd_blob', + max_file_segment_size = '1Ki', + cache_on_write_operations = 1, + boundary_alignment = '1Ki', + max_size = '10Gi', + max_elements = 10000000, + load_metadata_threads = 30); + + SYSTEM DROP FILESYSTEM CACHE; + INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; + INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; + """ + ) + assert int(node.query("SELECT count() FROM system.filesystem_cache")) > 0 + assert int(node.query("SELECT max(size) FROM system.filesystem_cache")) == 1024 + + write_count = int( + node.query( + "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" + ) + ) + assert write_count > 100000 + assert "" == node.query( + "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + ) + + node.query("SYSTEM DROP FILESYSTEM CACHE") + node.query("OPTIMIZE TABLE test FINAL") + + new_write_count = int( + node.query( + "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" + ) + ) + assert new_write_count >= write_count + + if forced_read_through_cache_on_merge: + assert 100000 < int( + node.query( + "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + ) + ) + else: + assert "" == node.query( + "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + ) + + node = cluster.instances["node_force_read_through_cache_on_merge"] + test(node, True) + node = cluster.instances["node"] + test(node, False) diff --git a/tests/integration/test_filesystem_cache/users.d/cache_on_write_operations.xml b/tests/integration/test_filesystem_cache/users.d/cache_on_write_operations.xml new file mode 100644 index 00000000000..5de169edc1e --- /dev/null +++ b/tests/integration/test_filesystem_cache/users.d/cache_on_write_operations.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml b/tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml new file mode 100644 index 00000000000..4d26a1a8bc7 --- /dev/null +++ b/tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 157837983f7..c03b928684b 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -1,62 +1,232 @@ Using storage policy: s3_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 0 +SELECT count(), sum(size) FROM system.filesystem_cache 0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT count(), sum(size) size FROM system.filesystem_cache 8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 +SYSTEM START MERGES test_02241 +SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' +85146 +SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' +OPTIMIZE TABLE test_02241 FINAL +SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' +251542 +SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' +SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache 41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 Using storage policy: local_cache +DROP TABLE IF EXISTS test_02241 +CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='local_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1 +SYSTEM STOP MERGES test_02241 +SYSTEM DROP FILESYSTEM CACHE +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 0 +SELECT count(), sum(size) FROM system.filesystem_cache 0 0 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 745 size: 746 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 1100 +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 0 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT * FROM test_02241 FORMAT Null +SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0 2 +SELECT count(), sum(size) size FROM system.filesystem_cache 8 1100 +SYSTEM DROP FILESYSTEM CACHE +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200) +SELECT file_segment_range_begin, file_segment_range_end, size, state + FROM + ( + SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path + FROM + ( + SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path + FROM system.remote_data_paths + ) AS data_paths + INNER JOIN + system.filesystem_cache AS caches + ON data_paths.cache_path = caches.cache_path + ) + WHERE endsWith(local_path, 'data.bin') + FORMAT Vertical; Row 1: ────── file_segment_range_begin: 0 file_segment_range_end: 1659 size: 1660 state: DOWNLOADED +SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path 8 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0 +SELECT count(), sum(size) FROM system.filesystem_cache 8 2014 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) +SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 +SYSTEM START MERGES test_02241 +SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' +81715476 +SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' +OPTIMIZE TABLE test_02241 FINAL +SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' +81881872 +SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' +SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 +ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 +SELECT count(), sum(size) FROM system.filesystem_cache 41 250541 +INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) +SYSTEM FLUSH LOGS INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000) 0 +SELECT count() FROM test_02241 5010500 +SELECT count() FROM test_02241 WHERE value LIKE '%010%' 18816 diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 96f61cf61e8..2b237492e98 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -10,13 +10,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) for STORAGE_POLICY in 's3_cache' 'local_cache'; do echo "Using storage policy: $STORAGE_POLICY" - $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS test_02241" - $CLICKHOUSE_CLIENT --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1" - $CLICKHOUSE_CLIENT --query "SYSTEM STOP MERGES test_02241" + $CLICKHOUSE_CLIENT --echo --query "DROP TABLE IF EXISTS test_02241" + $CLICKHOUSE_CLIENT --echo --query "CREATE TABLE test_02241 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='$STORAGE_POLICY', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false, ratio_of_defaults_for_sparse_serialization = 1" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM STOP MERGES test_02241" - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM DROP FILESYSTEM CACHE" - $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + $CLICKHOUSE_CLIENT --echo -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path @@ -32,12 +32,12 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do WHERE endsWith(local_path, 'data.bin') FORMAT Vertical" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" - $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + $CLICKHOUSE_CLIENT --echo -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path @@ -53,24 +53,24 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do WHERE endsWith(local_path, 'data.bin') FORMAT Vertical" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + $CLICKHOUSE_CLIENT --echo --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT * FROM test_02241 FORMAT Null" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" + $CLICKHOUSE_CLIENT --echo --query "SELECT * FROM test_02241 FORMAT Null" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM system.filesystem_cache WHERE cache_hits > 0" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) size FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) size FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SYSTEM DROP FILESYSTEM CACHE" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM DROP FILESYSTEM CACHE" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100, 200)" - $CLICKHOUSE_CLIENT -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state + $CLICKHOUSE_CLIENT --echo -n --query "SELECT file_segment_range_begin, file_segment_range_end, size, state FROM ( SELECT file_segment_range_begin, file_segment_range_end, size, state, local_path @@ -86,27 +86,34 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do WHERE endsWith(local_path, 'data.bin') FORMAT Vertical;" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM (SELECT arrayJoin(cache_paths) AS cache_path, local_path, remote_path FROM system.remote_data_paths ) AS data_paths INNER JOIN system.filesystem_cache AS caches ON data_paths.cache_path = caches.cache_path" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100) SETTINGS enable_filesystem_cache_on_write_operations=0" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(100)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000)" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --query "SYSTEM START MERGES test_02241" + $CLICKHOUSE_CLIENT --echo --query "SYSTEM START MERGES test_02241" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" + $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" - $CLICKHOUSE_CLIENT --query "SELECT count(), sum(size) FROM system.filesystem_cache" - $CLICKHOUSE_CLIENT --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" - $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" + $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" + $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" + + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" + $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" + $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(5000000)" + + $CLICKHOUSE_CLIENT --echo --query "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -n --query "SELECT query, ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read @@ -121,6 +128,6 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do DESC LIMIT 1" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241" - $CLICKHOUSE_CLIENT --query "SELECT count() FROM test_02241 WHERE value LIKE '%010%'" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM test_02241" + $CLICKHOUSE_CLIENT --echo --query "SELECT count() FROM test_02241 WHERE value LIKE '%010%'" done From a80747b2385647678771281d815867ef87b580f6 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 22 Feb 2024 15:57:10 +0000 Subject: [PATCH 051/985] Undo something to the test --- .../0_stateless/02998_primary_key_skip_columns.reference | 2 +- tests/queries/0_stateless/02998_primary_key_skip_columns.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.reference b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference index ec44acbd16b..08ccdb83b11 100644 --- a/tests/queries/0_stateless/02998_primary_key_skip_columns.reference +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.reference @@ -15,4 +15,4 @@ Key size: 2400000 120 2334 19 -Key size ok: 1 1 +Key size: 800000 diff --git a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql index 27672d7854e..b2dadcc5e7c 100644 --- a/tests/queries/0_stateless/02998_primary_key_skip_columns.sql +++ b/tests/queries/0_stateless/02998_primary_key_skip_columns.sql @@ -28,6 +28,6 @@ SELECT count() FROM test WHERE b > 7898976344263989848 AND b < 80403209398191531 SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND c > 13239894303140990071 AND c < 16179795840886947236; SELECT count() FROM test WHERE a > 1849813033528774208 AND a < 4594276315503201760 AND b > 7898976344263989848 AND b < 8040320939819153137 AND c > 13239894303140990071 AND c < 16179795840886947236; -SELECT 'Key size ok: ', (sum(primary_key_bytes_in_memory) as s) >= 800000, s < 1200000 FROM system.parts WHERE database = currentDatabase() AND table = 'test'; +SELECT 'Key size: ', round(sum(primary_key_bytes_in_memory), -5) FROM system.parts WHERE database = currentDatabase() AND table = 'test'; DROP TABLE test; From a34f42ca22c8a4820e4cbcf67cdd48a3589e3879 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jan 2024 18:48:47 +0300 Subject: [PATCH 052/985] Remove lock from the ReadProgressCallback It looks redundant (added in 5ef51ed), though it has "fix tests" in the log message, but CI reports is not available for the commits from that PR [1], so let's try. [1]: https://github.com/ClickHouse/ClickHouse/pull/37543 Also this can be a big problem, since the code under that lock (throttling or quotas with previous implementation that uses boost::atomic_shared_ptr) may sleep. Some numbers: run | time ------------------------|------ max_threads=100 before | 23.1 max_threads=100 after | 15.1 max_threads=4500 before | 4.5 max_threads=4500 after | 2.3 Query: select sum(number) from numbers_mt(2000000) settings max_threads=X, max_block_size = 1 Signed-off-by: Azat Khuzhin --- src/QueryPipeline/ReadProgressCallback.cpp | 2 -- src/QueryPipeline/ReadProgressCallback.h | 1 - tests/performance/small_block_contention.xml | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 tests/performance/small_block_contention.xml diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 59843d8791d..e90fc24d882 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -126,8 +126,6 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c CurrentThread::updatePerformanceCountersIfNeeded(); - std::lock_guard lock(limits_and_quotas_mutex); - /// TODO: Should be done in PipelineExecutor. for (const auto & limits : storage_limits) limits.local_limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_stopwatch.elapsedMicroseconds(), limits.local_limits.timeout_overflow_mode); diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index 5dbf3344bdf..7dfed9df5da 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -41,7 +41,6 @@ private: /// The total number of bytes to read. For progress bar. std::atomic_size_t total_bytes = 0; - std::mutex limits_and_quotas_mutex; Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time bool update_profile_events = true; diff --git a/tests/performance/small_block_contention.xml b/tests/performance/small_block_contention.xml new file mode 100644 index 00000000000..ce1995a0a29 --- /dev/null +++ b/tests/performance/small_block_contention.xml @@ -0,0 +1,3 @@ + + select sum(number) from numbers_mt(200000) settings max_threads=100, max_block_size = 1 format Null + From 9cb1ade3e2967507885f5b0e3deefab2ad40082c Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 22 Feb 2024 17:07:47 +0000 Subject: [PATCH 053/985] fix db iterator wait --- src/Common/AsyncLoader.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 20 ++++++++++++++++---- src/Databases/IDatabase.h | 17 +---------------- src/Interpreters/InterpreterDropQuery.cpp | 2 +- 4 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 140194e10b4..4a39454ccbb 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -39,7 +39,7 @@ void logAboutProgress(LoggerPtr log, size_t processed, size_t total, AtomicStopw { if (total && (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))) { - LOG_INFO(log, "Processed: {}%", static_cast(processed * 1000.0 / total) * 0.1); + LOG_INFO(log, "Processed: {:.1f}%", static_cast(processed) * 100.0 / total); watch.restart(); } } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 644bed23350..40e0fb0a0ed 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -289,10 +289,22 @@ void DatabaseOrdinary::stopLoading() DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const { - auto result = DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); - std::scoped_lock lock(mutex); - typeid_cast(*result).setLoadTasks(startup_table); - return result; + // Wait for every table (matching the filter) to be loaded and started up before we make the snapshot. + // It is important, because otherwise table might be: + // - not attached and thus will be missed in the snapshot; + // - not started, which is not good for DDL operations. + LoadTaskPtrs tasks_to_wait; + { + std::lock_guard lock(mutex); + if (!filter_by_table_name) + tasks_to_wait.reserve(startup_table.size()); + for (const auto & [table_name, task] : startup_table) + if (!filter_by_table_name || filter_by_table_name(table_name)) + tasks_to_wait.emplace_back(task); + } + waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), tasks_to_wait); + + return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); } void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index ec380fa759d..75662bfebe3 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -77,17 +77,12 @@ private: Tables tables; Tables::iterator it; - // Tasks to wait before returning a table - using Tasks = std::unordered_map; - Tasks tasks; - protected: DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) noexcept : IDatabaseTablesIterator(std::move(other.database_name)) { size_t idx = std::distance(other.tables.begin(), other.it); std::swap(tables, other.tables); - std::swap(tasks, other.tasks); other.it = other.tables.end(); it = tables.begin(); std::advance(it, idx); @@ -110,17 +105,7 @@ public: const String & name() const override { return it->first; } - const StoragePtr & table() const override - { - if (auto task = tasks.find(it->first); task != tasks.end()) - waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), task->second); - return it->second; - } - - void setLoadTasks(const Tasks & tasks_) - { - tasks = tasks_; - } + const StoragePtr & table() const override { return it->second; } }; using DatabaseTablesIteratorPtr = std::unique_ptr; diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 711100b5de1..72aa4cc63e3 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -417,7 +417,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, uuids_to_wait.push_back(table_to_wait); } } - // only if operation is DETACH + // only if operation is DETACH if ((!drop || !truncate) && query.sync) { /// Avoid "some tables are still in use" when sync mode is enabled From 835b47519a7c575d70542e5a37c97dbf5a2b25f9 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 14 Feb 2024 00:44:38 +0100 Subject: [PATCH 054/985] impl --- src/Backups/BackupIO_S3.cpp | 1 + src/Coordination/KeeperSnapshotManagerS3.cpp | 1 + src/Disks/ObjectStorages/S3/diskSettings.cpp | 1 + src/IO/S3/Client.cpp | 19 +++++++++--- src/IO/S3/Client.h | 6 ++++ src/IO/S3/Requests.h | 32 ++++++++++++++++++-- src/IO/S3/URI.cpp | 18 +++++------ src/IO/S3/tests/gtest_aws_s3_client.cpp | 25 ++++++++++++++- src/IO/WriteBufferFromS3.cpp | 15 ++++++++- src/IO/WriteBufferFromS3.h | 1 + src/IO/tests/gtest_s3_uri.cpp | 8 +++++ src/IO/tests/gtest_writebuffer_s3.cpp | 21 +++++++------ src/Storages/StorageS3.cpp | 1 + 13 files changed, 119 insertions(+), 30 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 9359602a651..2063af2061c 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -73,6 +73,7 @@ namespace .use_virtual_addressing = s3_uri.is_virtual_hosted_style, .disable_checksum = local_settings.s3_disable_checksum, .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), + .is_s3express_bucket = S3::isS3ExpressEndpoint(s3_uri.endpoint), }; return S3::ClientFactory::instance().create( diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 0337a564660..9779a041095 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -103,6 +103,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo .use_virtual_addressing = new_uri.is_virtual_hosted_style, .disable_checksum = false, .gcs_issue_compose_request = false, + .is_s3express_bucket = S3::isS3ExpressEndpoint(new_uri.endpoint), }; auto client = S3::ClientFactory::instance().create( diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 4fd4b17aabe..b8688cd3de6 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -97,6 +97,7 @@ std::unique_ptr getClient( .use_virtual_addressing = uri.is_virtual_hosted_style, .disable_checksum = local_settings.s3_disable_checksum, .gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false), + .is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint), }; return S3::ClientFactory::instance().create( diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 1b6b245b89a..a75d41df3d1 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -304,6 +304,9 @@ Model::HeadObjectOutcome Client::HeadObject(HeadObjectRequest & request) const request.setApiMode(api_mode); + if (isS3ExpressBucket()) + request.setIsS3ExpressBucket(); + addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers); if (auto region = getRegionForBucket(bucket); !region.empty()) @@ -530,7 +533,11 @@ Client::doRequest(RequestType & request, RequestFn request_fn) const addAdditionalAMZHeadersToCanonicalHeadersList(request, client_configuration.extra_headers); const auto & bucket = request.GetBucket(); request.setApiMode(api_mode); - if (client_settings.disable_checksum) + + /// We have to use checksums for S3Express buckets, so the order of checks should be the following + if (client_settings.is_s3express_bucket) + request.setIsS3ExpressBucket(); + else if (client_settings.disable_checksum) request.disableChecksum(); if (auto region = getRegionForBucket(bucket); !region.empty()) @@ -915,9 +922,9 @@ std::unique_ptr ClientFactory::create( // NOLINT std::move(sse_kms_config), credentials_provider, client_configuration, // Client configuration. - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - client_settings - ); + client_settings.is_s3express_bucket ? Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::RequestDependent + : Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + client_settings); } PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT @@ -956,6 +963,10 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT return config; } +bool isS3ExpressEndpoint(const std::string & endpoint) +{ + return endpoint.contains("s3express"); +} } } diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index 8da21bd2c2c..c7bc727bf32 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -92,6 +92,8 @@ private: std::unordered_map> client_caches; }; +bool isS3ExpressEndpoint(const std::string & endpoint); + struct ClientSettings { bool use_virtual_addressing; @@ -107,6 +109,7 @@ struct ClientSettings /// Ability to enable it preserved since likely it is required for old /// files. bool gcs_issue_compose_request; + bool is_s3express_bucket; }; /// Client that improves the client from the AWS SDK @@ -208,6 +211,9 @@ public: const std::shared_ptr& httpRequest) const override; bool supportsMultiPartCopy() const; + + bool isS3ExpressBucket() const { return client_settings.is_s3express_bucket; } + private: friend struct ::MockS3::Client; diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index bfb94a5a67e..6f82a0f39d3 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -21,12 +21,32 @@ #include #include #include +#include +#include + +#include namespace DB::S3 { namespace Model = Aws::S3::Model; +/// Used only for S3Express +namespace RequestChecksum +{ +inline void setPartChecksum(Model::CompletedPart & part, const std::string & checksum) +{ + part.SetChecksumCRC32(checksum); +} + +template +inline void setChecksumAlgorithm(R & request) +{ + if constexpr (requires { request.SetChecksumAlgorithm(Model::ChecksumAlgorithm::CRC32); }) + request.SetChecksumAlgorithm(Model::ChecksumAlgorithm::CRC32); +} +}; + template class ExtendedRequest : public BaseRequest { @@ -49,11 +69,13 @@ public: Aws::String GetChecksumAlgorithmName() const override { + chassert(!is_s3express_bucket || checksum); + /// Return empty string is enough to disable checksums (see /// AWSClient::AddChecksumToRequest [1] for more details). /// /// [1]: https://github.com/aws/aws-sdk-cpp/blob/b0ee1c0d336dbb371c34358b68fba6c56aae2c92/src/aws-cpp-sdk-core/source/client/AWSClient.cpp#L783-L839 - if (!checksum) + if (!is_s3express_bucket && !checksum) return ""; return BaseRequest::GetChecksumAlgorithmName(); } @@ -84,9 +106,12 @@ public: } /// Disable checksum to avoid extra read of the input stream - void disableChecksum() const + void disableChecksum() const { checksum = false; } + + void setIsS3ExpressBucket() { - checksum = false; + is_s3express_bucket = true; + RequestChecksum::setChecksumAlgorithm(*this); } protected: @@ -94,6 +119,7 @@ protected: mutable std::optional uri_override; mutable ApiMode api_mode{ApiMode::AWS}; mutable bool checksum = true; + bool is_s3express_bucket = false; }; class CopyObjectRequest : public ExtendedRequest diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 23f59420bfe..062d3b80850 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -35,7 +35,7 @@ URI::URI(const std::string & uri_) /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access - static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); + static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.Region.amazonaws.com/bucket-name/key) @@ -43,6 +43,7 @@ URI::URI(const std::string & uri_) static const RE2 path_style_pattern("^/([^/]*)/(.*)"); static constexpr auto S3 = "S3"; + static constexpr auto S3EXPRESS = "S3EXPRESS"; static constexpr auto COSN = "COSN"; static constexpr auto COS = "COS"; static constexpr auto OBS = "OBS"; @@ -115,21 +116,16 @@ URI::URI(const std::string & uri_) } boost::to_upper(name); - if (name != S3 && name != COS && name != OBS && name != OSS && name != EOS) + /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info + if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name)); - if (name == S3) - storage_name = name; - else if (name == OBS) - storage_name = OBS; - else if (name == OSS) - storage_name = OSS; - else if (name == EOS) - storage_name = EOS; - else + if (name == COS || name == COSN) storage_name = COSN; + else + storage_name = name; } else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) { diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 33917314bca..8edbe12a22f 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -110,7 +110,8 @@ void testServerSideEncryption( bool disable_checksum, String server_side_encryption_customer_key_base64, DB::S3::ServerSideEncryptionKMSConfig sse_kms_config, - String expected_headers) + String expected_headers, + bool is_s3express_bucket = false) { TestPocoHTTPServer http; @@ -144,6 +145,7 @@ void testServerSideEncryption( .use_virtual_addressing = uri.is_virtual_hosted_style, .disable_checksum = disable_checksum, .gcs_issue_compose_request = false, + .is_s3express_bucket = is_s3express_bucket, }; std::shared_ptr client = DB::S3::ClientFactory::instance().create( @@ -295,4 +297,25 @@ TEST(IOTestAwsS3Client, AppendExtraSSEKMSHeadersWrite) "x-amz-server-side-encryption-context: arn:aws:s3:::bucket_ARN\n"); } +TEST(IOTestAwsS3Client, ChecksumHeaderIsPresentForS3Express) +{ + /// See https://github.com/ClickHouse/ClickHouse/pull/19748 + testServerSideEncryption( + doWriteRequest, + /* disable_checksum= */ true, + "", + {}, + "authorization: ... SignedHeaders=" + "amz-sdk-invocation-id;" + "amz-sdk-request;" + "content-length;" + "content-type;" + "host;" + "x-amz-checksum-crc32;" + "x-amz-content-sha256;" + "x-amz-date;" + "x-amz-sdk-checksum-algorithm, ...\n", + /*is_s3express_bucket=*/true); +} + #endif diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 5bb01050591..a162992278f 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -18,7 +18,9 @@ #include #include +#include #include +#include #include @@ -456,6 +458,14 @@ S3::UploadPartRequest WriteBufferFromS3::getUploadRequest(size_t part_number, Pa /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 req.SetContentType("binary/octet-stream"); + /// Checksums need to be provided on CompleteMultipartUpload requests, so we calculate then manually and store in multipart_checksums + if (client_ptr->isS3ExpressBucket()) + { + chassert(req.GetChecksumAlgorithm() == Aws::S3::Model::ChecksumAlgorithm::CRC32); + req.SetChecksumCRC32(Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateCRC32(*(req.GetBody())))); + multipart_checksums.push_back(req.GetChecksumCRC32()); + } + return req; } @@ -575,7 +585,10 @@ void WriteBufferFromS3::completeMultipartUpload() for (size_t i = 0; i < multipart_tags.size(); ++i) { Aws::S3::Model::CompletedPart part; - multipart_upload.AddParts(part.WithETag(multipart_tags[i]).WithPartNumber(static_cast(i + 1))); + part.WithETag(multipart_tags[i]).WithPartNumber(static_cast(i + 1)); + if (!multipart_checksums.empty()) + S3::RequestChecksum::setPartChecksum(part, multipart_checksums.at(i)); + multipart_upload.AddParts(part); } req.SetMultipartUpload(multipart_upload); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 5dc269990a1..148cd27f854 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -100,6 +100,7 @@ private: /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts. String multipart_upload_id; std::deque multipart_tags; + std::deque multipart_checksums; bool multipart_upload_finished = false; /// Track that prefinalize() is called only once diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index c088e41f1e8..5bf0dfb962d 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -162,6 +162,14 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ("", uri.version_id); ASSERT_EQ(false, uri.is_virtual_hosted_style); } + { + S3::URI uri("https://test-perf-bucket--eun1-az1--x-s3.s3express-eun1-az1.eu-north-1.amazonaws.com/test.csv"); + ASSERT_EQ("https://s3express-eun1-az1.eu-north-1.amazonaws.com", uri.endpoint); + ASSERT_EQ("test-perf-bucket--eun1-az1--x-s3", uri.bucket); + ASSERT_EQ("test.csv", uri.key); + ASSERT_EQ("", uri.version_id); + ASSERT_EQ(true, uri.is_virtual_hosted_style); + } } TEST_P(S3UriTest, invalidPatterns) diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index ae00bb2e9e2..d9cb486c09e 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -205,16 +205,17 @@ struct Client : DB::S3::Client { explicit Client(std::shared_ptr mock_s3_store) : DB::S3::Client( - 100, - DB::S3::ServerSideEncryptionKMSConfig(), - std::make_shared("", ""), - GetClientConfiguration(), - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, - DB::S3::ClientSettings{ - .use_virtual_addressing = true, - .disable_checksum= false, - .gcs_issue_compose_request = false, - }) + 100, + DB::S3::ServerSideEncryptionKMSConfig(), + std::make_shared("", ""), + GetClientConfiguration(), + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, + DB::S3::ClientSettings{ + .use_virtual_addressing = true, + .disable_checksum = false, + .gcs_issue_compose_request = false, + .is_s3express_bucket = false, + }) , store(mock_s3_store) {} diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d8ef3df1c8..044a1ca5362 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1427,6 +1427,7 @@ void StorageS3::Configuration::connect(const ContextPtr & context) .use_virtual_addressing = url.is_virtual_hosted_style, .disable_checksum = local_settings.s3_disable_checksum, .gcs_issue_compose_request = context->getConfigRef().getBool("s3.gcs_issue_compose_request", false), + .is_s3express_bucket = S3::isS3ExpressEndpoint(url.endpoint), }; auto credentials = Aws::Auth::AWSCredentials(auth_settings.access_key_id, auth_settings.secret_access_key, auth_settings.session_token); From 18741f122eabaeb7903f355958af1e1a88818e83 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Feb 2024 12:42:13 +0800 Subject: [PATCH 055/985] Move a setting to server setting --- src/Core/ServerSettings.h | 2 ++ src/Core/Settings.h | 1 - src/Interpreters/Context.cpp | 2 +- .../config.d/force_read_through_cache_for_merges.xml | 3 +++ tests/integration/test_filesystem_cache/test.py | 2 +- .../users.d/force_read_through_cache_on_merge.xml | 7 ------- 6 files changed, 7 insertions(+), 10 deletions(-) create mode 100644 tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml delete mode 100644 tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index de2a4e9b755..0283b98638f 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -103,6 +103,8 @@ namespace DB M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \ M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ \ + M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ + \ M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \ M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index db060bf712d..433195af9c3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -769,7 +769,6 @@ class IColumn; M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \ M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \ M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, but don't put more entries into the cache. If you set this setting for heavy ad-hoc queries and leave it disabled for short real-time queries, this will allows to avoid cache threshing by too heavy queries and to improve the overall system efficiency.", 0) \ - M(Bool, force_read_through_cache_for_merges, false, "Force read-through cache for merges", 0) \ M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \ M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \ M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 36b362e36bb..a974eaca067 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5079,7 +5079,7 @@ ReadSettings Context::getReadSettings() const res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; - res.force_read_through_cache_merges = settings.force_read_through_cache_for_merges; + res.force_read_through_cache_merges = getServerSettings().force_read_through_cache_for_merges; res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; diff --git a/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml b/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml new file mode 100644 index 00000000000..bb2a6e850a4 --- /dev/null +++ b/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml @@ -0,0 +1,3 @@ + + 1 + diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index c1ba6702dcf..f32fa4e9823 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -42,9 +42,9 @@ def cluster(): "node_force_read_through_cache_on_merge", main_configs=[ "config.d/storage_conf.xml", + "config.d/force_read_through_cache_for_merges.xml", ], user_configs=[ - "users.d/force_read_through_cache_on_merge.xml", "users.d/cache_on_write_operations.xml", ], stay_alive=True, diff --git a/tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml b/tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml deleted file mode 100644 index 4d26a1a8bc7..00000000000 --- a/tests/integration/test_filesystem_cache/users.d/force_read_through_cache_on_merge.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - From bf5affbe640976d2b73e12f5213a13baacf40619 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Feb 2024 16:37:09 +0800 Subject: [PATCH 056/985] Fix test --- .../02241_filesystem_cache_on_write_operations.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh index 2b237492e98..ee1d942a421 100755 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.sh @@ -99,14 +99,8 @@ for STORAGE_POLICY in 's3_cache' 'local_cache'; do $CLICKHOUSE_CLIENT --echo --query "SYSTEM START MERGES test_02241" - $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" - $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" - $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --query "OPTIMIZE TABLE test_02241 FINAL" - $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" - $CLICKHOUSE_CLIENT --echo --query "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" - $CLICKHOUSE_CLIENT --echo --query "SELECT count(), sum(size) FROM system.filesystem_cache" $CLICKHOUSE_CLIENT --echo --enable_filesystem_cache_on_write_operations=1 --mutations_sync=2 --query "ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100" From 277e8d965555b4fcd09a755282666bcae36adae6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Feb 2024 14:03:53 +0800 Subject: [PATCH 057/985] Fix usage plain metadata type with new configuration option --- src/Disks/DiskType.cpp | 48 +++++++++++++++++++ src/Disks/DiskType.h | 34 +------------ src/Disks/ObjectStorages/IObjectStorage.h | 1 + .../ObjectStorages/MetadataStorageFactory.cpp | 36 +++++++++++--- .../ObjectStorages/MetadataStorageFactory.h | 7 +++ .../ObjectStorages/ObjectStorageFactory.cpp | 43 +++++++++++++---- src/Disks/ObjectStorages/PlainObjectStorage.h | 29 +++++++++++ .../RegisterDiskObjectStorage.cpp | 24 ++-------- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 21 -------- .../configs/disk_s3.xml | 7 +++ .../test_attach_backup_from_s3_plain/test.py | 25 ++++++---- 11 files changed, 178 insertions(+), 97 deletions(-) create mode 100644 src/Disks/ObjectStorages/PlainObjectStorage.h diff --git a/src/Disks/DiskType.cpp b/src/Disks/DiskType.cpp index 218b6ee7f26..1778ae8025b 100644 --- a/src/Disks/DiskType.cpp +++ b/src/Disks/DiskType.cpp @@ -1,7 +1,27 @@ #include "DiskType.h" +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_ELEMENT_IN_CONFIG; +} + +MetadataStorageType metadataTypeFromString(const String & type) +{ + auto check_type = Poco::toLower(type); + if (check_type == "local") + return MetadataStorageType::Local; + if (check_type == "plain") + return MetadataStorageType::Plain; + if (check_type == "web") + return MetadataStorageType::StaticWeb; + + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, + "MetadataStorageFactory: unknown metadata storage type: {}", type); +} bool DataSourceDescription::operator==(const DataSourceDescription & other) const { @@ -14,4 +34,32 @@ bool DataSourceDescription::sameKind(const DataSourceDescription & other) const == std::tie(other.type, other.object_storage_type, other.description); } +std::string DataSourceDescription::toString() const +{ + switch (type) + { + case DataSourceType::Local: + return "local"; + case DataSourceType::RAM: + return "memory"; + case DataSourceType::ObjectStorage: + { + switch (object_storage_type) + { + case ObjectStorageType::S3: + return "s3"; + case ObjectStorageType::HDFS: + return "hdfs"; + case ObjectStorageType::Azure: + return "azure_blob_storage"; + case ObjectStorageType::Local: + return "local_blob_storage"; + case ObjectStorageType::Web: + return "web"; + case ObjectStorageType::None: + return "none"; + } + } + } +} } diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 15940ea9155..36fe4d83004 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -17,7 +17,6 @@ enum class ObjectStorageType { None, S3, - S3_Plain, Azure, HDFS, Web, @@ -30,9 +29,9 @@ enum class MetadataStorageType Local, Plain, StaticWeb, - Memory, }; +MetadataStorageType metadataTypeFromString(const String & type); String toString(DataSourceType data_source_type); struct DataSourceDescription @@ -49,36 +48,7 @@ struct DataSourceDescription bool operator==(const DataSourceDescription & other) const; bool sameKind(const DataSourceDescription & other) const; - std::string toString() const - { - switch (type) - { - case DataSourceType::Local: - return "local"; - case DataSourceType::RAM: - return "memory"; - case DataSourceType::ObjectStorage: - { - switch (object_storage_type) - { - case ObjectStorageType::S3: - return "s3"; - case ObjectStorageType::S3_Plain: - return "s3_plain"; - case ObjectStorageType::HDFS: - return "hdfs"; - case ObjectStorageType::Azure: - return "azure_blob_storage"; - case ObjectStorageType::Local: - return "local_blob_storage"; - case ObjectStorageType::Web: - return "web"; - case ObjectStorageType::None: - return "none"; - } - } - } - } + std::string toString() const; }; } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 56c269a3fc5..fde97d82ad1 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -218,6 +218,7 @@ public: virtual bool isReadOnly() const { return false; } virtual bool isWriteOnce() const { return false; } + virtual bool isPlain() const { return false; } virtual bool supportParallelWrite() const { return false; } diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index 52a0b9ec268..adc1f84372c 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -32,6 +32,35 @@ void MetadataStorageFactory::registerMetadataStorageType(const std::string & met } } +std::string MetadataStorageFactory::getCompatibilityMetadataTypeHint(const ObjectStorageType & type) +{ + switch (type) + { + case ObjectStorageType::S3: + case ObjectStorageType::HDFS: + case ObjectStorageType::Local: + case ObjectStorageType::Azure: + return "local"; + case ObjectStorageType::Web: + return "web"; + default: + return ""; + } +} + +std::string MetadataStorageFactory::getMetadataType( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const std::string & compatibility_type_hint) +{ + if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config"); + } + + return config.getString(config_prefix + ".metadata_type", compatibility_type_hint); +} + MetadataStoragePtr MetadataStorageFactory::create( const std::string & name, const Poco::Util::AbstractConfiguration & config, @@ -39,12 +68,7 @@ MetadataStoragePtr MetadataStorageFactory::create( ObjectStoragePtr object_storage, const std::string & compatibility_type_hint) const { - if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config"); - } - - const auto type = config.getString(config_prefix + ".metadata_type", compatibility_type_hint); + const auto type = getMetadataType(config, config_prefix, compatibility_type_hint); const auto it = registry.find(type); if (it == registry.end()) diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.h b/src/Disks/ObjectStorages/MetadataStorageFactory.h index 5f61125c599..467cd3cef98 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.h +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.h @@ -25,6 +25,13 @@ public: ObjectStoragePtr object_storage, const std::string & compatibility_type_hint) const; + static std::string getMetadataType( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const std::string & compatibility_type_hint = ""); + + static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type); + private: using Registry = std::unordered_map; Registry registry; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index b3626135177..6f6ff199902 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -16,8 +16,10 @@ #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include +#include #include #endif +#include #include #include @@ -32,6 +34,28 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace +{ + template + ObjectStoragePtr createObjectStorage( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Args && ...args) + { + auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(ObjectStorageType::S3); + auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); + + if (metadataTypeFromString(metadata_type) == MetadataStorageType::Plain) + { + return std::make_shared>(std::forward(args)...); + } + else + { + return std::make_shared(std::forward(args)...); + } + } +} + ObjectStorageFactory & ObjectStorageFactory::instance() { static ObjectStorageFactory factory; @@ -129,12 +153,12 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) auto client = getClient(config, config_prefix, context, *settings); auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); - auto object_storage = std::make_shared( - std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); + auto object_storage = createObjectStorage( + config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name, uri.key); return object_storage; }); @@ -165,12 +189,12 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto client = getClient(config, config_prefix, context, *settings); auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); - auto object_storage = std::make_shared( + auto object_storage = std::make_shared>( std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name, uri.key); return object_storage; }); @@ -198,7 +222,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) context->getSettingsRef().hdfs_replication ); - return std::make_unique(uri, std::move(settings), config); + return createObjectStorage(config, config_prefix, uri, std::move(settings), config); }); } #endif @@ -214,7 +238,8 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { String container_name = config.getString(config_prefix + ".container_name", "default-container"); - return std::make_unique( + return createObjectStorage( + config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), @@ -248,7 +273,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory) ErrorCodes::BAD_ARGUMENTS, "Bad URI: `{}`. Error: {}", uri, e.what()); } - return std::make_shared(uri, context); + return createObjectStorage(config, config_prefix, uri, context); }); } @@ -266,7 +291,7 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes); /// keys are mapped to the fs, object_key_prefix is a directory also fs::create_directories(object_key_prefix); - return std::make_shared(object_key_prefix); + return createObjectStorage(config, config_prefix, object_key_prefix); }); } #endif diff --git a/src/Disks/ObjectStorages/PlainObjectStorage.h b/src/Disks/ObjectStorages/PlainObjectStorage.h new file mode 100644 index 00000000000..3a81b85c44b --- /dev/null +++ b/src/Disks/ObjectStorages/PlainObjectStorage.h @@ -0,0 +1,29 @@ +#pragma once +#include + +namespace DB +{ + +/// Do not encode keys, store as-is, and do not require separate disk for metadata. +/// But because of this does not support renames/hardlinks/attrs/... +/// +/// NOTE: This disk has excessive API calls. +template +class PlainObjectStorage : public BaseObjectStorage +{ +public: + template + explicit PlainObjectStorage(Args && ...args) + : BaseObjectStorage(std::forward(args)...) {} + + std::string getName() const override { return "" + BaseObjectStorage::getName(); } + + /// Notes: + /// - supports BACKUP to this disk + /// - does not support INSERT into MergeTree table on this disk + bool isWriteOnce() const override { return true; } + + bool isPlain() const override { return true; } +}; + +} diff --git a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp index 383a0b079b5..669a0102951 100644 --- a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp @@ -10,25 +10,6 @@ namespace DB void registerObjectStorages(); void registerMetadataStorages(); -static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type) -{ - switch (type) - { - case ObjectStorageType::S3: - case ObjectStorageType::HDFS: - case ObjectStorageType::Local: - case ObjectStorageType::Azure: - return "local"; - case ObjectStorageType::S3_Plain: - return "plain"; - case ObjectStorageType::Web: - return "web"; - case ObjectStorageType::None: - return ""; - } - UNREACHABLE(); -} - void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_check) { registerObjectStorages(); @@ -47,7 +28,10 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch std::string compatibility_metadata_type_hint; if (!config.has(config_prefix + ".metadata_type")) { - compatibility_metadata_type_hint = getCompatibilityMetadataTypeHint(object_storage->getType()); + if (object_storage->isPlain()) + compatibility_metadata_type_hint = "plain"; + else + compatibility_metadata_type_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(object_storage->getType()); } auto metadata_storage = MetadataStorageFactory::instance().create( diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index ab0fa5bed68..4ece98c5ec4 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -182,27 +182,6 @@ private: LoggerPtr log; }; -/// Do not encode keys, store as-is, and do not require separate disk for metadata. -/// But because of this does not support renames/hardlinks/attrs/... -/// -/// NOTE: This disk has excessive API calls. -class S3PlainObjectStorage : public S3ObjectStorage -{ -public: - std::string getName() const override { return "S3PlainObjectStorage"; } - - template - explicit S3PlainObjectStorage(Args && ...args) - : S3ObjectStorage("S3PlainObjectStorage", std::forward(args)...) {} - - ObjectStorageType getType() const override { return ObjectStorageType::S3_Plain; } - - /// Notes: - /// - supports BACKUP to this disk - /// - does not support INSERT into MergeTree table on this disk - bool isWriteOnce() const override { return true; } -}; - } #endif diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml index 779e4b6ae21..3166eea7ccb 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -8,9 +8,16 @@ minio minio123 + + object_storage + local + plain + local_plain/ +
backup_disk_s3_plain + backup_disk_local_plain diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index e575c487b7a..4a8da1e6d66 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -20,17 +20,27 @@ def start_cluster(): finally: cluster.shutdown() +s3_disk_def = """disk(type=s3_plain, + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', + access_key_id='minio', + secret_access_key='minio123');""" + +local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" @pytest.mark.parametrize( - "table_name,backup_name,storage_policy,min_bytes_for_wide_part", + "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", [ pytest.param( - "compact", "backup_compact", "s3_backup_compact", int(1e9), id="compact" + "compact", "backup_compact_s3", "backup_disk_s3_plain", s3_disk_def, int(1e9), id="compact" ), - pytest.param("wide", "backup_wide", "s3_backup_wide", int(0), id="wide"), + pytest.param("wide", "backup_wide_s3", "backup_disk_s3_plain", s3_disk_def, int(0), id="wide"), + pytest.param( + "compact", "backup_compact_local", "backup_disk_local_plain", local_disk_def, int(1e9), id="compact" + ), + pytest.param("wide", "backup_wide_local", "backup_disk_local_plain", local_disk_def, int(0), id="wide"), ], ) -def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide_part): +def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part): node.query( f""" -- Catch any errors (NOTE: warnings are ok) @@ -45,7 +55,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide settings min_bytes_for_wide_part={min_bytes_for_wide_part} as select number%5 part, number key from numbers(100); - backup table ordinary_db.{table_name} TO Disk('backup_disk_s3_plain', '{backup_name}') settings deduplicate_files=0; + backup table ordinary_db.{table_name} TO Disk('{storage_policy}', '{backup_name}') settings deduplicate_files=0; drop table ordinary_db.{table_name}; attach table ordinary_db.{table_name} (part UInt8, key UInt64) @@ -53,10 +63,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide order by key partition by part settings max_suspicious_broken_parts=0, - disk=disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', - access_key_id='minio', - secret_access_key='minio123'); + disk={disk_def} """ ) From 69b5bd02a915ae044b4116de759d11ae80525dc5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 26 Feb 2024 09:37:17 +0000 Subject: [PATCH 058/985] Automatic style fix --- .../test_attach_backup_from_s3_plain/test.py | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index 4a8da1e6d66..900366b2c9c 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -20,27 +20,57 @@ def start_cluster(): finally: cluster.shutdown() + s3_disk_def = """disk(type=s3_plain, endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', access_key_id='minio', secret_access_key='minio123');""" -local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" +local_disk_def = ( + "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" +) + @pytest.mark.parametrize( "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", [ pytest.param( - "compact", "backup_compact_s3", "backup_disk_s3_plain", s3_disk_def, int(1e9), id="compact" + "compact", + "backup_compact_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(1e9), + id="compact", ), - pytest.param("wide", "backup_wide_s3", "backup_disk_s3_plain", s3_disk_def, int(0), id="wide"), pytest.param( - "compact", "backup_compact_local", "backup_disk_local_plain", local_disk_def, int(1e9), id="compact" + "wide", + "backup_wide_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(0), + id="wide", + ), + pytest.param( + "compact", + "backup_compact_local", + "backup_disk_local_plain", + local_disk_def, + int(1e9), + id="compact", + ), + pytest.param( + "wide", + "backup_wide_local", + "backup_disk_local_plain", + local_disk_def, + int(0), + id="wide", ), - pytest.param("wide", "backup_wide_local", "backup_disk_local_plain", local_disk_def, int(0), id="wide"), ], ) -def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part): +def test_attach_part( + table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part +): node.query( f""" -- Catch any errors (NOTE: warnings are ok) From ac4af6a4ad3b67860eae79b2ed3320fc5981a954 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 26 Feb 2024 19:58:49 +0000 Subject: [PATCH 059/985] Don't allow to set max_parallel_replicas to 0 as it doesn't make sense --- src/Client/ConnectionPoolWithFailover.cpp | 9 +++++++++ src/Client/HedgedConnectionsFactory.cpp | 3 +++ src/Client/HedgedConnectionsFactory.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- src/Planner/PlannerJoinTree.cpp | 4 ++-- .../03001_max_parallel_replicas_zero_value.reference | 0 .../03001_max_parallel_replicas_zero_value.sql | 5 +++++ 7 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference create mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 492fd4ae9e2..46b9741c812 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -191,11 +191,20 @@ std::vector ConnectionPoolWithFailover::g max_entries = nested_pools.size(); } else if (pool_mode == PoolMode::GET_ONE) + { max_entries = 1; + } else if (pool_mode == PoolMode::GET_MANY) + { + if (settings.max_parallel_replicas == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); + max_entries = settings.max_parallel_replicas; + } else + { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); + } if (!priority_func) priority_func = makeGetPriorityFunc(settings); diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index f5b074a0257..a4e5dbf04ac 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -82,6 +82,9 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { + if (max_parallel_replicas == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); + max_entries = max_parallel_replicas; break; } diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index ce7b553acdd..dd600d58e1e 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -158,7 +158,7 @@ private: /// checking the number of requested replicas that are still in process). size_t requested_connections_count = 0; - const size_t max_parallel_replicas = 0; + const size_t max_parallel_replicas = 1; const bool skip_unavailable_shards = 0; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d34294b4c4b..fe5e5dc69d1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -871,7 +871,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() { /// The query could use trivial count if it didn't use parallel replicas, so let's disable it and reanalyze context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas to be able to use a trivial count optimization"); return true; } @@ -909,7 +909,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() if (number_of_replicas_to_use <= 1) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas because there aren't enough rows to read"); return true; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index e6a459d0e8a..2b1cd7fb353 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -295,7 +295,7 @@ bool applyTrivialCountIfPossible( /// The query could use trivial count if it didn't use parallel replicas, so let's disable it query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - query_context->setSetting("max_parallel_replicas", UInt64{0}); + query_context->setSetting("max_parallel_replicas", UInt64{1}); LOG_TRACE(getLogger("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); } @@ -756,7 +756,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { planner_context->getMutableQueryContext()->setSetting( "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{0}); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); } else if (number_of_replicas_to_use < settings.max_parallel_replicas) diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql new file mode 100644 index 00000000000..611aa4777ba --- /dev/null +++ b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql @@ -0,0 +1,5 @@ +drop table if exists test_d; +create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); +select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{serverError BAD_ARGUMENTS} +drop table test_d; + From 8aa9f36484bbe814a1e3edccc608e71b73915857 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 26 Feb 2024 22:05:54 +0100 Subject: [PATCH 060/985] Fix style --- src/Client/ConnectionPoolWithFailover.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 46b9741c812..ad8ed0067d8 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ALL_CONNECTION_TRIES_FAILED; + extern const int BAD_ARGUMENTS; } From f264f0a0360baf1413ec38d3f3f30c70595064f4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 26 Feb 2024 22:06:10 +0100 Subject: [PATCH 061/985] Fix style --- src/Client/HedgedConnectionsFactory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index a4e5dbf04ac..16a03a696bd 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } HedgedConnectionsFactory::HedgedConnectionsFactory( From f53f43b78d3cf2da6219ea4bdea7018d9811ae54 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 17:33:47 +0800 Subject: [PATCH 062/985] Fixes for LocalObjectStorage and plain metadata --- .../Local/LocalObjectStorage.cpp | 37 +++++++++++++++++-- .../ObjectStorages/Local/LocalObjectStorage.h | 4 ++ .../MetadataStorageFromPlainObjectStorage.cpp | 5 +-- .../ObjectStorages/ObjectStorageFactory.cpp | 31 ++++++++++------ src/Disks/ObjectStorages/PlainObjectStorage.h | 6 +++ src/Disks/ObjectStorages/S3/DiskS3Utils.cpp | 6 --- src/Disks/ObjectStorages/S3/DiskS3Utils.h | 1 - .../ObjectStorages/S3/S3ObjectStorage.cpp | 2 + .../configs/disk_s3.xml | 4 +- .../test_attach_backup_from_s3_plain/test.py | 7 ++-- 10 files changed, 71 insertions(+), 32 deletions(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 02700b358e0..51c260cc270 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -31,6 +31,8 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_) description = *block_device_id; else description = "/"; + + fs::create_directories(getCommonKeyPrefix()); } bool LocalObjectStorage::exists(const StoredObject & object) const @@ -53,6 +55,7 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size); }; + LOG_TEST(log, "Read object: {}", objects[0].remote_path); switch (read_settings.remote_fs_method) { case RemoteFSReadMethod::read: @@ -111,8 +114,8 @@ std::unique_ptr LocalObjectStorage::readObject( /// NOLI if (!file_size) file_size = tryGetSizeFromFilePath(path); - LOG_TEST(log, "Read object: {}", path); - return createReadBufferFromFileBase(path, patchSettings(read_settings), read_hint, file_size); + LOG_TEST(log, "Read object: {}", object.remote_path); + return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size); } std::unique_ptr LocalObjectStorage::writeObject( /// NOLINT @@ -126,6 +129,7 @@ std::unique_ptr LocalObjectStorage::writeObject( /// NO throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files"); LOG_TEST(log, "Write object: {}", object.remote_path); + fs::create_directories(fs::path(object.remote_path).parent_path()); return std::make_unique(object.remote_path, buf_size); } @@ -157,9 +161,34 @@ void LocalObjectStorage::removeObjectsIfExist(const StoredObjects & objects) removeObjectIfExists(object); } -ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path */) const +ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for LocalObjectStorage"); + ObjectMetadata object_metadata; + LOG_TEST(log, "Getting metadata for path: {}", path); + object_metadata.size_bytes = fs::file_size(path); + object_metadata.last_modified = Poco::Timestamp::fromEpochTime( + std::chrono::duration_cast(fs::last_write_time(path).time_since_epoch()).count()); + return object_metadata; +} + +void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const +{ + for (const auto & entry : fs::directory_iterator(path)) + { + if (entry.is_directory()) + { + listObjects(entry.path(), children, 0); + continue; + } + + auto metadata = getObjectMetadata(entry.path()); + children.emplace_back(entry.path(), std::move(metadata)); + } +} + +bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const +{ + return exists(StoredObject(path)); } void LocalObjectStorage::copyObject( // NOLINT diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index ed5f8c1f537..22429a99c76 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -58,6 +58,10 @@ public: ObjectMetadata getObjectMetadata(const std::string & path) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + + bool existsOrHasAnyChild(const std::string & path) const override; + void copyObject( /// NOLINT const StoredObject & object_from, const StoredObject & object_to, diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index b03809f5b39..4b8fc74e956 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -48,10 +48,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path std::string directory = object_key.serialize(); if (!directory.ends_with('/')) directory += '/'; - - RelativePathsWithMetadata files; - object_storage->listObjects(directory, files, 1); - return !files.empty(); + return object_storage->existsOrHasAnyChild(directory); } uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 6f6ff199902..f64c42c1403 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -36,16 +36,24 @@ namespace ErrorCodes namespace { + bool isPlainStorage( + ObjectStorageType type, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix) + { + auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(type); + auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); + return metadataTypeFromString(metadata_type) == MetadataStorageType::Plain; + } + template ObjectStoragePtr createObjectStorage( + ObjectStorageType type, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Args && ...args) { - auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(ObjectStorageType::S3); - auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); - - if (metadataTypeFromString(metadata_type) == MetadataStorageType::Plain) + if (isPlainStorage(type, config, config_prefix)) { return std::make_shared>(std::forward(args)...); } @@ -151,10 +159,10 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); auto client = getClient(config, config_prefix, context, *settings); - auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); + auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = createObjectStorage( - config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); + ObjectStorageType::S3, config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) @@ -187,7 +195,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); auto client = getClient(config, config_prefix, context, *settings); - auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); + auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = std::make_shared>( std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); @@ -222,7 +230,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) context->getSettingsRef().hdfs_replication ); - return createObjectStorage(config, config_prefix, uri, std::move(settings), config); + return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config); }); } #endif @@ -239,8 +247,7 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) { String container_name = config.getString(config_prefix + ".container_name", "default-container"); return createObjectStorage( - config, config_prefix, - name, + ObjectStorageType::Azure, config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), container_name); @@ -273,7 +280,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory) ErrorCodes::BAD_ARGUMENTS, "Bad URI: `{}`. Error: {}", uri, e.what()); } - return createObjectStorage(config, config_prefix, uri, context); + return createObjectStorage(ObjectStorageType::Web, config, config_prefix, uri, context); }); } @@ -291,7 +298,7 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes); /// keys are mapped to the fs, object_key_prefix is a directory also fs::create_directories(object_key_prefix); - return createObjectStorage(config, config_prefix, object_key_prefix); + return createObjectStorage(ObjectStorageType::Local, config, config_prefix, object_key_prefix); }); } #endif diff --git a/src/Disks/ObjectStorages/PlainObjectStorage.h b/src/Disks/ObjectStorages/PlainObjectStorage.h index 3a81b85c44b..e0907d0b4d8 100644 --- a/src/Disks/ObjectStorages/PlainObjectStorage.h +++ b/src/Disks/ObjectStorages/PlainObjectStorage.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -24,6 +25,11 @@ public: bool isWriteOnce() const override { return true; } bool isPlain() const override { return true; } + + ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override + { + return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path); + } }; } diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp index bb7b53b2d22..4b889f89f90 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp @@ -15,16 +15,10 @@ namespace ErrorCodes } ObjectStorageKeysGeneratorPtr getKeyGenerator( - String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - if (type == "s3_plain") - return createObjectStorageKeysGeneratorAsIsWithPrefix(uri.key); - - chassert(type == "s3"); - bool storage_metadata_write_full_object_key = DiskObjectStorageMetadata::getWriteFullObjectKeySetting(); bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.h b/src/Disks/ObjectStorages/S3/DiskS3Utils.h index 29e39d4bc1b..8524a9ccac3 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.h +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.h @@ -12,7 +12,6 @@ namespace DB namespace S3 { struct URI; } ObjectStorageKeysGeneratorPtr getKeyGenerator( - String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 5771eb1ebe0..b2a9ab8fdc3 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -561,6 +561,8 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const { + if (!key_generator) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); return key_generator->generate(path); } diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml index 3166eea7ccb..2edabc76c8b 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -10,9 +10,9 @@ object_storage - local + local_blob_storage plain - local_plain/ + /local_plain/ diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index 4a8da1e6d66..983275cc24f 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -21,11 +21,11 @@ def start_cluster(): cluster.shutdown() s3_disk_def = """disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{}/', access_key_id='minio', secret_access_key='minio123');""" -local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" +local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/')" @pytest.mark.parametrize( "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", @@ -41,6 +41,7 @@ local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metad ], ) def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part): + disk_definition = disk_def.format(backup_name) node.query( f""" -- Catch any errors (NOTE: warnings are ok) @@ -63,7 +64,7 @@ def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_byte order by key partition by part settings max_suspicious_broken_parts=0, - disk={disk_def} + disk={disk_definition} """ ) From fb38bd139c433ead685028f232e8c4fad5e566d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 17:38:02 +0800 Subject: [PATCH 063/985] Remove debug logging --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 51c260cc270..4ec998a2bb0 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -55,7 +55,6 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size); }; - LOG_TEST(log, "Read object: {}", objects[0].remote_path); switch (read_settings.remote_fs_method) { case RemoteFSReadMethod::read: @@ -109,10 +108,8 @@ std::unique_ptr LocalObjectStorage::readObject( /// NOLI std::optional read_hint, std::optional file_size) const { - const auto & path = object.remote_path; - if (!file_size) - file_size = tryGetSizeFromFilePath(path); + file_size = tryGetSizeFromFilePath(object.remote_path); LOG_TEST(log, "Read object: {}", object.remote_path); return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size); From 978fe9fa1a069a231bb52c66b3898c6ce112a215 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 17:43:34 +0800 Subject: [PATCH 064/985] Add comments --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 4ec998a2bb0..7f34ca48f7f 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -126,7 +126,11 @@ std::unique_ptr LocalObjectStorage::writeObject( /// NO throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files"); LOG_TEST(log, "Write object: {}", object.remote_path); + + /// Unlike real blob storage, in local fs we cannot create a file with non-existing prefix. + /// So let's create it. fs::create_directories(fs::path(object.remote_path).parent_path()); + return std::make_unique(object.remote_path, buf_size); } @@ -185,6 +189,8 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const { + /// Unlike real object storage, existance of a prefix path can be checked by + /// just checking existence of this prefix directly, so simple exists is enough here. return exists(StoredObject(path)); } From 33788250b1f74384661cd241e2badef82c8fdbf6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 18:07:19 +0800 Subject: [PATCH 065/985] Update test.py --- tests/integration/test_attach_backup_from_s3_plain/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index 3a0fa70a715..c2f8936b82c 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -26,9 +26,8 @@ s3_disk_def = """disk(type=s3_plain, access_key_id='minio', secret_access_key='minio123');""" -local_disk_def = ( - "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain', path = '/local_plain/{}/'" -) +local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/');" + @pytest.mark.parametrize( "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", @@ -67,7 +66,6 @@ local_disk_def = ( ), ], ) - def test_attach_part( table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part ): From 58a53b42acb3b25a41e8529186db9df0d4387f77 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 27 Feb 2024 14:31:35 +0100 Subject: [PATCH 066/985] Set max_entries to min(max_parallel_replicas, all available reolicas) --- src/Client/HedgedConnectionsFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 16a03a696bd..703cc1f8821 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -86,7 +86,7 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode if (max_parallel_replicas == 0) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - max_entries = max_parallel_replicas; + max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); break; } } From 98b27fd45fbe1109442c2313181ca4e8435e2024 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 23:00:27 +0800 Subject: [PATCH 067/985] Fix style check --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 7f34ca48f7f..eba57969580 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -189,7 +189,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const { - /// Unlike real object storage, existance of a prefix path can be checked by + /// Unlike real object storage, existence of a prefix path can be checked by /// just checking existence of this prefix directly, so simple exists is enough here. return exists(StoredObject(path)); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index b2a9ab8fdc3..eec3a5914fc 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -48,6 +48,7 @@ namespace ErrorCodes { extern const int S3_ERROR; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } namespace From 416638461fe832673252445d8fabb3fe554eed49 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 27 Feb 2024 15:02:13 +0000 Subject: [PATCH 068/985] Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike --- src/Functions/array/FunctionArrayMapped.h | 4 ++-- .../03002_map_array_functions_with_low_cardinality.reference | 1 + .../03002_map_array_functions_with_low_cardinality.sql | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference create mode 100644 tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 49ed9d495e2..136d3481771 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -355,7 +355,7 @@ public: { arrays.emplace_back( column_tuple->getColumnPtr(j), - recursiveRemoveLowCardinality(type_tuple.getElement(j)), + type_tuple.getElement(j), array_with_type_and_name.name + "." + tuple_names[j]); } } @@ -363,7 +363,7 @@ public: { arrays.emplace_back( column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), + array_type->getNestedType(), array_with_type_and_name.name); } diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql new file mode 100644 index 00000000000..8240a8f93f5 --- /dev/null +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql @@ -0,0 +1,2 @@ +SELECT mapContainsKeyLike(map('aa', toLowCardinality(1), 'bb', toLowCardinality(2)), toLowCardinality('a%')); + From 5771e739f0e65baae69f1e7abd42495d5fbc5488 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Feb 2024 23:11:29 +0800 Subject: [PATCH 069/985] Update ReadSettings.h --- src/IO/ReadSettings.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 2c79735317d..846fcd668f0 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -98,7 +98,6 @@ struct ReadSettings bool enable_filesystem_cache = true; bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; bool enable_filesystem_cache_log = false; - /// Don't populate cache when the read is not part of query execution (e.g. background thread). bool force_read_through_cache_merges = false; size_t filesystem_cache_segments_batch_size = 20; From 1eba06dc113881b2845d36a7d3a4703ad64659d7 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 27 Feb 2024 23:12:41 +0800 Subject: [PATCH 070/985] Update 02241_filesystem_cache_on_write_operations.reference --- .../02241_filesystem_cache_on_write_operations.reference | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index c03b928684b..53566a18edc 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -95,13 +95,7 @@ INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 SYSTEM START MERGES test_02241 -SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' -85146 -SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' OPTIMIZE TABLE test_02241 FINAL -SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' -251542 -SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 From ffd69e0e127f64cf90a41d7b710c375ced13f092 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 23:22:04 +0800 Subject: [PATCH 071/985] Move setting to merge-tree level --- src/Core/ServerSettings.h | 3 --- src/Interpreters/Context.cpp | 1 - src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 3 ++- src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../config.d/force_read_through_cache_for_merges.xml | 4 +++- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 0283b98638f..0063b3a2bd6 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -102,9 +102,6 @@ namespace DB M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \ M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \ M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ - \ - M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ - \ M(Seconds, keep_alive_timeout, DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, "The number of seconds that ClickHouse waits for incoming requests before closing the connection.", 0) \ M(Seconds, replicated_fetches_http_connection_timeout, 0, "HTTP connection timeout for part fetch requests. Inherited from default profile `http_connection_timeout` if not set explicitly.", 0) \ M(Seconds, replicated_fetches_http_send_timeout, 0, "HTTP send timeout for part fetch requests. Inherited from default profile `http_send_timeout` if not set explicitly.", 0) \ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a974eaca067..55a4df10206 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5079,7 +5079,6 @@ ReadSettings Context::getReadSettings() const res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; - res.force_read_through_cache_merges = getServerSettings().force_read_through_cache_for_merges; res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index e375e8b0a9f..6b0c5ccb59a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -151,7 +151,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( const auto & context = storage.getContext(); ReadSettings read_settings = context->getReadSettings(); - read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = !read_settings.force_read_through_cache_merges; + read_settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = !storage.getSettings()->force_read_through_cache_for_merges; + /// It does not make sense to use pthread_threadpool for background merges/mutations /// And also to preserve backward compatibility read_settings.local_fs_method = LocalFSReadMethod::pread; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index b64632b6139..9cb74e76dd5 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -191,6 +191,7 @@ struct Settings; M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ M(Bool, cache_populated_by_fetch, false, "Only available in ClickHouse Cloud", 0) \ + M(Bool, force_read_through_cache_for_merges, false, "Force read-through filesystem cache for merges", 0) \ M(Bool, allow_experimental_block_number_column, false, "Enable persisting column _block_number for each row.", 0) \ M(Bool, allow_experimental_replacing_merge_with_cleanup, false, "Allow experimental CLEANUP merges for ReplacingMergeTree with is_deleted column.", 0) \ \ diff --git a/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml b/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml index bb2a6e850a4..23d3fdea800 100644 --- a/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml +++ b/tests/integration/test_filesystem_cache/config.d/force_read_through_cache_for_merges.xml @@ -1,3 +1,5 @@ - 1 + + 1 + From 524a2ca72decc124ef1e38b79843c2388cceb0bb Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 27 Feb 2024 19:17:34 +0100 Subject: [PATCH 072/985] WIP on createForShard --- .../OptimizeShardingKeyRewriteInVisitor.cpp | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 3a592c0fe55..8aca28a90ef 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -5,6 +5,12 @@ #include #include #include +#include "Analyzer/ColumnNode.h" +#include "Analyzer/ConstantNode.h" +#include "Analyzer/FunctionNode.h" +#include "Analyzer/IQueryTreeNode.h" +#include "Analyzer/InDepthQueryTreeVisitor.h" +#include "DataTypes/IDataType.h" namespace { @@ -119,4 +125,42 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d } } + +class OptimizeShardingKeyRewriteIn : InDepthQueryTreeVisitorWithContext +{ +public: + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; + + void enterImpl(QueryTreeNodePtr & node) + { + auto * function_node = node->as(); + if (!function_node || function_node->getFunctionName() != "in") + return; + + auto & arguments = function_node->getArguments().getNodes(); + auto * column = arguments[0]->as(); + if (!column) + return; + + if (!data.sharding_key_expr->getRequiredColumnsWithTypes().contains(column->getColumnName())) + return; + + if (auto * constant = arguments[1]->as()) + { + if (isTuple(constant->getResultType())) + { + auto & tuple = constant->getValue().get(); + std::erase_if(tuple, [&](auto & child) + { + return tuple.size() > 1 && !shardContains(child, name, data); + }); + } + } + } + + OptimizeShardingKeyRewriteInMatcher::Data data; +}; + + } From cb8390e9c8672bcdead0108be75021d6c6f21331 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 28 Feb 2024 13:32:43 +0800 Subject: [PATCH 073/985] Fix build --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index f64c42c1403..d0c2c9ac4f4 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -16,10 +16,10 @@ #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include -#include #include #endif #include +#include #include #include From 0de2d766fa971f54eff40641e16ed6857e1ece5f Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 28 Feb 2024 15:30:06 +0100 Subject: [PATCH 074/985] WIP on different JSONs on shards --- src/Analyzer/IdentifierNode.cpp | 10 +- src/Analyzer/IdentifierNode.h | 6 ++ src/DataTypes/ObjectUtils.cpp | 75 ++++++++++++- src/DataTypes/ObjectUtils.h | 11 ++ .../ClusterProxy/SelectStreamFactory.cpp | 79 +++++++++++++- .../ClusterProxy/SelectStreamFactory.h | 26 +++++ .../ClusterProxy/executeQuery.cpp | 101 ++++++++++++------ src/Interpreters/ClusterProxy/executeQuery.h | 2 - .../OptimizeShardingKeyRewriteInVisitor.cpp | 35 ++++-- .../OptimizeShardingKeyRewriteInVisitor.h | 3 + src/Processors/QueryPlan/ReadFromRemote.cpp | 10 +- src/Storages/StorageDistributed.cpp | 32 +++--- 12 files changed, 325 insertions(+), 65 deletions(-) diff --git a/src/Analyzer/IdentifierNode.cpp b/src/Analyzer/IdentifierNode.cpp index 88b3daacb12..7e4d4c02a4c 100644 --- a/src/Analyzer/IdentifierNode.cpp +++ b/src/Analyzer/IdentifierNode.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -56,13 +57,18 @@ void IdentifierNode::updateTreeHashImpl(HashState & state) const QueryTreeNodePtr IdentifierNode::cloneImpl() const { - return std::make_shared(identifier); + auto result = std::make_shared(identifier); + result->use_parts_for_to_ast = use_parts_for_to_ast; + return result; } ASTPtr IdentifierNode::toASTImpl(const ConvertToASTOptions & /* options */) const { auto identifier_parts = identifier.getParts(); - return std::make_shared(std::move(identifier_parts)); + if (use_parts_for_to_ast) + return std::make_shared(std::move(identifier_parts)); + else + return std::make_shared(identifier.getFullName()); } } diff --git a/src/Analyzer/IdentifierNode.h b/src/Analyzer/IdentifierNode.h index 872bb14d512..3bc37b4c69d 100644 --- a/src/Analyzer/IdentifierNode.h +++ b/src/Analyzer/IdentifierNode.h @@ -52,6 +52,11 @@ public: void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; + void useFullNameInToAST() + { + use_parts_for_to_ast = false; + } + protected: bool isEqualImpl(const IQueryTreeNode & rhs) const override; @@ -64,6 +69,7 @@ protected: private: Identifier identifier; std::optional table_expression_modifiers; + bool use_parts_for_to_ast = false; static constexpr size_t children_size = 0; }; diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 23d29136c85..01ba50d90f3 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -20,6 +21,16 @@ #include #include #include +#include "Analyzer/ConstantNode.h" +#include "Analyzer/FunctionNode.h" +#include "Analyzer/IQueryTreeNode.h" +#include "Analyzer/Identifier.h" +#include "Analyzer/IdentifierNode.h" +#include "Analyzer/QueryNode.h" +#include "Analyzer/Utils.h" +#include +#include +#include "Common/logger_useful.h" namespace DB @@ -888,10 +899,10 @@ static void addConstantToWithClause(const ASTPtr & query, const String & column_ /// @expected_columns and @available_columns contain descriptions /// of extended Object columns. -void replaceMissedSubcolumnsByConstants( +NamesAndTypes calculateMissedSubcolumns( const ColumnsDescription & expected_columns, - const ColumnsDescription & available_columns, - ASTPtr query) + const ColumnsDescription & available_columns +) { NamesAndTypes missed_names_types; @@ -928,6 +939,18 @@ void replaceMissedSubcolumnsByConstants( [](const auto & lhs, const auto & rhs) { return lhs.name < rhs.name; }); } + return missed_names_types; +} + +/// @expected_columns and @available_columns contain descriptions +/// of extended Object columns. +void replaceMissedSubcolumnsByConstants( + const ColumnsDescription & expected_columns, + const ColumnsDescription & available_columns, + ASTPtr query) +{ + NamesAndTypes missed_names_types = calculateMissedSubcolumns(expected_columns, available_columns); + if (missed_names_types.empty()) return; @@ -940,6 +963,52 @@ void replaceMissedSubcolumnsByConstants( addConstantToWithClause(query, name, type); } +/// @expected_columns and @available_columns contain descriptions +/// of extended Object columns. +void replaceMissedSubcolumnsByConstants( + const ColumnsDescription & expected_columns, + const ColumnsDescription & available_columns, + QueryTreeNodePtr & query, + const ContextPtr & context [[maybe_unused]]) +{ + NamesAndTypes missed_names_types = calculateMissedSubcolumns(expected_columns, available_columns); + + if (missed_names_types.empty()) + return; + + auto * query_node = query->as(); + if (!query_node) + return; + + auto table_expression = extractLeftTableExpression(query_node->getJoinTree()); + + auto & with_nodes = query_node->getWith().getNodes(); + + std::unordered_map column_name_to_node; + for (const auto & [name, type] : missed_names_types) + { + auto constant = std::make_shared(type->getDefault(), type); + constant->setAlias(table_expression->getAlias() + name); + // auto materialize = std::make_shared("materialize"); + + // auto function = FunctionFactory::instance().get("materialize", context); + // materialize->getArguments().getNodes() = { constant }; + // materialize->resolveAsFunction(function->build(materialize->getArgumentColumns())); + // materialize->setAlias(name); + + with_nodes.push_back(constant); + + auto id = std::make_shared(Identifier(table_expression->getAlias() + name)); + id->useFullNameInToAST(); + column_name_to_node[name] = id; + LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Name {} Expression\n{}", name, column_name_to_node[name]->dumpTree()); + } + + LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Table expression\n{} ", table_expression->dumpTree()); + replaceColumns(query, table_expression, column_name_to_node); + LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Result:\n{} ", query->dumpTree()); +} + Field FieldVisitorReplaceScalars::operator()(const Array & x) const { if (num_dimensions_to_keep == 0) diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index 2bfcaae09ca..f4a8abe8abf 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -3,6 +3,8 @@ #include #include #include +#include "Analyzer/IQueryTreeNode.h" +#include "Interpreters/Context_fwd.h" #include #include #include @@ -14,6 +16,9 @@ namespace DB struct StorageSnapshot; using StorageSnapshotPtr = std::shared_ptr; +class IQueryTreeNode; +using QueryTreeNodePtr = std::shared_ptr; + /// Returns number of dimensions in Array type. 0 if type is not array. size_t getNumberOfDimensions(const IDataType & type); @@ -97,6 +102,12 @@ void replaceMissedSubcolumnsByConstants( const ColumnsDescription & available_columns, ASTPtr query); +void replaceMissedSubcolumnsByConstants( + const ColumnsDescription & expected_columns, + const ColumnsDescription & available_columns, + QueryTreeNodePtr & query, + const ContextPtr & context); + /// Visitor that keeps @num_dimensions_to_keep dimensions in arrays /// and replaces all scalars or nested arrays to @replacement at that level. class FieldVisitorReplaceScalars : public StaticVisitor diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index f0592735caf..5167ffc0e27 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -5,6 +5,10 @@ #include #include #include +#include "Analyzer/IQueryTreeNode.h" +#include "Interpreters/InterpreterSelectQueryAnalyzer.h" +#include "Interpreters/SelectQueryOptions.h" +#include "Planner/Utils.h" #include #include #include @@ -124,18 +128,55 @@ void SelectStreamFactory::createForShard( if (it != objects_by_shard.end()) replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, query_ast); + createForShardImpl( + shard_info, + query_ast, + main_table, + table_func_ptr, + std::move(context), + local_plans, + remote_shards, + shard_count, + parallel_replicas_enabled, + std::move(shard_filter_generator)); +} + +void SelectStreamFactory::createForShardImpl( + const Cluster::ShardInfo & shard_info, + const ASTPtr & query_ast, + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards, + UInt32 shard_count, + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator) +{ auto emplace_local_stream = [&]() { + Block shard_header; + if (context->getSettingsRef().allow_experimental_analyzer) + shard_header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_ast, context, SelectQueryOptions(processed_stage).analyze()); + else + shard_header = header; + local_plans.emplace_back(createLocalPlan( - query_ast, header, context, processed_stage, shard_info.shard_num, shard_count)); + query_ast, shard_header, context, processed_stage, shard_info.shard_num, shard_count)); }; auto emplace_remote_stream = [&](bool lazy = false, time_t local_delay = 0) { + Block shard_header; + if (context->getSettingsRef().allow_experimental_analyzer) + shard_header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_ast, context, SelectQueryOptions(processed_stage).analyze()); + else + shard_header = header; + remote_shards.emplace_back(Shard{ .query = query_ast, .main_table = main_table, - .header = header, + .header = shard_header, .shard_info = shard_info, .lazy = lazy, .local_delay = local_delay, @@ -243,6 +284,40 @@ void SelectStreamFactory::createForShard( emplace_remote_stream(); } +void SelectStreamFactory::createForShard( + const Cluster::ShardInfo & shard_info, + const QueryTreeNodePtr & query_tree, + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards, + UInt32 shard_count, + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator) +{ + + auto it = objects_by_shard.find(shard_info.shard_num); + QueryTreeNodePtr modified_query = query_tree; + if (it != objects_by_shard.end()) + replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, modified_query, context); + + auto query_ast = queryNodeToDistributedSelectQuery(modified_query); + + createForShardImpl( + shard_info, + query_ast, + main_table, + table_func_ptr, + std::move(context), + local_plans, + remote_shards, + shard_count, + parallel_replicas_enabled, + std::move(shard_filter_generator)); + +} + } } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 9993ea7028d..45d6ea14c01 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -7,6 +7,7 @@ #include #include #include +#include "Analyzer/IQueryTreeNode.h" namespace DB { @@ -83,10 +84,35 @@ public: bool parallel_replicas_enabled, AdditionalShardFilterGenerator shard_filter_generator); + void createForShard( + const Cluster::ShardInfo & shard_info, + const QueryTreeNodePtr & query_tree, + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards, + UInt32 shard_count, + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator); + const Block header; const ColumnsDescriptionByShardNum objects_by_shard; const StorageSnapshotPtr storage_snapshot; QueryProcessingStage::Enum processed_stage; + +private: + void createForShardImpl( + const Cluster::ShardInfo & shard_info, + const ASTPtr & query_ast, + const StorageID & main_table, + const ASTPtr & table_func_ptr, + ContextPtr context, + std::vector & local_plans, + Shards & remote_shards, + UInt32 shard_count, + bool parallel_replicas_enabled, + AdditionalShardFilterGenerator shard_filter_generator); }; } diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 6cdff939af1..07ef7aa6c96 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -204,12 +204,10 @@ void executeQuery( const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, LoggerPtr log, - const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, - const ClusterPtr & not_optimized_cluster, const DistributedSettings & distributed_settings, AdditionalShardFilterGenerator shard_filter_generator) { @@ -218,6 +216,8 @@ void executeQuery( if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); + const ClusterPtr & not_optimized_cluster = query_info.cluster; + std::vector plans; SelectStreamFactory::Shards remote_shards; @@ -237,40 +237,81 @@ void executeQuery( new_context->increaseDistributedDepth(); const size_t shards = cluster->getShardCount(); - for (size_t i = 0, s = cluster->getShardsInfo().size(); i < s; ++i) + + if (context->getSettingsRef().allow_experimental_analyzer) { - const auto & shard_info = cluster->getShardsInfo()[i]; - - ASTPtr query_ast_for_shard = query_ast->clone(); - if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + for (size_t i = 0, s = cluster->getShardsInfo().size(); i < s; ++i) { - OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ - sharding_key_expr, - sharding_key_expr->getSampleBlock().getByPosition(0).type, - sharding_key_column_name, + const auto & shard_info = cluster->getShardsInfo()[i]; + + auto query_for_shard = query_info.query_tree->clone(); + if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + { + OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ + sharding_key_expr, + sharding_key_expr->getSampleBlock().getByPosition(0).type, + sharding_key_column_name, + shard_info, + not_optimized_cluster->getSlotToShard(), + }; + optimizeShardingKeyRewriteIn(query_for_shard, std::move(visitor_data), new_context); + } + + // decide for each shard if parallel reading from replicas should be enabled + // according to settings and number of replicas declared per shard + const auto & addresses = cluster->getShardsAddresses().at(i); + bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseTaskBasedParallelReplicas(); + + stream_factory.createForShard( shard_info, - not_optimized_cluster->getSlotToShard(), - }; - OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); - visitor.visit(query_ast_for_shard); + query_for_shard, + main_table, + table_func_ptr, + new_context, + plans, + remote_shards, + static_cast(shards), + parallel_replicas_enabled, + shard_filter_generator); } + } + else + { + for (size_t i = 0, s = cluster->getShardsInfo().size(); i < s; ++i) + { + const auto & shard_info = cluster->getShardsInfo()[i]; - // decide for each shard if parallel reading from replicas should be enabled - // according to settings and number of replicas declared per shard - const auto & addresses = cluster->getShardsAddresses().at(i); - bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseTaskBasedParallelReplicas(); + ASTPtr query_ast_for_shard = query_info.query->clone(); + if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + { + OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ + sharding_key_expr, + sharding_key_expr->getSampleBlock().getByPosition(0).type, + sharding_key_column_name, + shard_info, + not_optimized_cluster->getSlotToShard(), + }; + OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); + visitor.visit(query_ast_for_shard); + } - stream_factory.createForShard( - shard_info, - query_ast_for_shard, - main_table, - table_func_ptr, - new_context, - plans, - remote_shards, - static_cast(shards), - parallel_replicas_enabled, - shard_filter_generator); + // decide for each shard if parallel reading from replicas should be enabled + // according to settings and number of replicas declared per shard + const auto & addresses = cluster->getShardsAddresses().at(i); + bool parallel_replicas_enabled = addresses.size() > 1 && context->canUseTaskBasedParallelReplicas(); + + stream_factory.createForShard( + shard_info, + query_ast_for_shard, + main_table, + table_func_ptr, + new_context, + plans, + remote_shards, + static_cast(shards), + parallel_replicas_enabled, + shard_filter_generator); + } } if (!remote_shards.empty()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index bbc3c6c9e49..8f6f6300c7b 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -58,12 +58,10 @@ void executeQuery( const ASTPtr & table_func_ptr, SelectStreamFactory & stream_factory, LoggerPtr log, - const ASTPtr & query_ast, ContextPtr context, const SelectQueryInfo & query_info, const ExpressionActionsPtr & sharding_key_expr, const std::string & sharding_key_column_name, - const ClusterPtr & not_optimized_cluster, const DistributedSettings & distributed_settings, AdditionalShardFilterGenerator shard_filter_generator); diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 8aca28a90ef..42c6e63da01 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -11,6 +12,7 @@ #include "Analyzer/IQueryTreeNode.h" #include "Analyzer/InDepthQueryTreeVisitor.h" #include "DataTypes/IDataType.h" +#include "Interpreters/Context_fwd.h" namespace { @@ -126,11 +128,15 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d } -class OptimizeShardingKeyRewriteIn : InDepthQueryTreeVisitorWithContext +class OptimizeShardingKeyRewriteIn : public InDepthQueryTreeVisitorWithContext { public: using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; + + OptimizeShardingKeyRewriteIn(OptimizeShardingKeyRewriteInVisitor::Data data_, ContextPtr context) + : Base(std::move(context)) + , data(std::move(data_)) + {} void enterImpl(QueryTreeNodePtr & node) { @@ -143,6 +149,8 @@ public: if (!column) return; + auto name = column->getColumnName(); + if (!data.sharding_key_expr->getRequiredColumnsWithTypes().contains(column->getColumnName())) return; @@ -150,17 +158,30 @@ public: { if (isTuple(constant->getResultType())) { - auto & tuple = constant->getValue().get(); - std::erase_if(tuple, [&](auto & child) + const auto & tuple = constant->getValue().get(); + Tuple new_tuple; + new_tuple.reserve(tuple.size()); + + for (const auto & child : tuple) { - return tuple.size() > 1 && !shardContains(child, name, data); - }); + if (shardContains(child, name, data)) + new_tuple.push_back(child); + } + + if (new_tuple.empty()) + new_tuple.push_back(tuple.back()); + node = std::make_shared(new_tuple); } } } - OptimizeShardingKeyRewriteInMatcher::Data data; + OptimizeShardingKeyRewriteInVisitor::Data data; }; +void optimizeShardingKeyRewriteIn(QueryTreeNodePtr & node, OptimizeShardingKeyRewriteInVisitor::Data data, ContextPtr context) +{ + OptimizeShardingKeyRewriteIn visitor(std::move(data), std::move(context)); + visitor.visit(node); +} } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h index d546db40df7..d202609160b 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.h @@ -2,6 +2,7 @@ #include #include +#include "Analyzer/IQueryTreeNode.h" namespace DB { @@ -44,4 +45,6 @@ struct OptimizeShardingKeyRewriteInMatcher using OptimizeShardingKeyRewriteInVisitor = InDepthNodeVisitor; +void optimizeShardingKeyRewriteIn(QueryTreeNodePtr & node, OptimizeShardingKeyRewriteInVisitor::Data data, ContextPtr context); + } diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 93c73a66b78..022c4f699f2 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -216,7 +216,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream }; pipes.emplace_back(createDelayedPipe(shard.header, lazily_create_stream, add_totals, add_extremes)); - addConvertingActions(pipes.back(), output_stream->header); + addConvertingActions(pipes.back(), shard.header); } void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard) @@ -281,7 +281,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact auto remote_query_executor = std::make_shared( shard.shard_info.pool, query_string, - output_stream->header, + shard.header, context, throttler, scalars, @@ -297,7 +297,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addConvertingActions(pipes.back(), output_stream->header); + addConvertingActions(pipes.back(), shard.header); } } else @@ -305,7 +305,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact const String query_string = formattedAST(shard.query); auto remote_query_executor = std::make_shared( - shard.shard_info.pool, query_string, output_stream->header, context, throttler, scalars, external_tables, stage); + shard.shard_info.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage); remote_query_executor->setLogger(log); if (context->canUseTaskBasedParallelReplicas()) @@ -326,7 +326,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addConvertingActions(pipes.back(), output_stream->header); + addConvertingActions(pipes.back(), shard.header); } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 92e7dcdf4f2..34ab21a4751 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -30,6 +30,7 @@ #include #include #include +#include "Analyzer/IQueryTreeNode.h" #include #include @@ -813,7 +814,8 @@ void StorageDistributed::read( const size_t /*num_streams*/) { Block header; - ASTPtr query_ast; + + SelectQueryInfo modified_query_info = query_info; if (local_context->getSettingsRef().allow_experimental_analyzer) { @@ -821,7 +823,7 @@ void StorageDistributed::read( if (!remote_table_function_ptr) remote_storage_id = StorageID{remote_database, remote_table}; - auto query_tree_distributed = buildQueryTreeDistributed(query_info, + auto query_tree_distributed = buildQueryTreeDistributed(modified_query_info, storage_snapshot, remote_storage_id, remote_table_function_ptr); @@ -831,20 +833,24 @@ void StorageDistributed::read( */ for (auto & column : header) column.column = column.column->convertToFullColumnIfConst(); - query_ast = queryNodeToDistributedSelectQuery(query_tree_distributed); + modified_query_info.query = queryNodeToDistributedSelectQuery(query_tree_distributed); + + modified_query_info.query_tree = std::move(query_tree_distributed); } else { - header = InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - query_ast = query_info.query; + header = InterpreterSelectQuery(modified_query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } - const auto & modified_query_ast = ClusterProxy::rewriteSelectQuery( - local_context, query_ast, - remote_database, remote_table, remote_table_function_ptr); + if (!local_context->getSettingsRef().allow_experimental_analyzer) + { + modified_query_info.query = ClusterProxy::rewriteSelectQuery( + local_context, modified_query_info.query, + remote_database, remote_table, remote_table_function_ptr); + } /// Return directly (with correct header) if no shard to query. - if (query_info.getCluster()->getShardsInfo().empty()) + if (modified_query_info.getCluster()->getShardsInfo().empty()) { if (local_context->getSettingsRef().allow_experimental_analyzer) return; @@ -872,7 +878,7 @@ void StorageDistributed::read( const auto & settings = local_context->getSettingsRef(); ClusterProxy::AdditionalShardFilterGenerator additional_shard_filter_generator; - if (local_context->canUseParallelReplicasCustomKey(*query_info.getCluster())) + if (local_context->canUseParallelReplicasCustomKey(*modified_query_info.getCluster())) { if (auto custom_key_ast = parseCustomKeyForTable(settings.parallel_replicas_custom_key, *local_context)) { @@ -881,7 +887,7 @@ void StorageDistributed::read( column_description = this->getInMemoryMetadataPtr()->columns, custom_key_type = settings.parallel_replicas_custom_key_filter_type.value, context = local_context, - replica_count = query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr + replica_count = modified_query_info.getCluster()->getShardsInfo().front().per_replica_pools.size()](uint64_t replica_num) -> ASTPtr { return getCustomKeyFilterForParallelReplica( replica_count, replica_num - 1, my_custom_key_ast, custom_key_type, column_description, context); @@ -897,12 +903,10 @@ void StorageDistributed::read( remote_table_function_ptr, select_stream_factory, log, - modified_query_ast, local_context, - query_info, + modified_query_info, sharding_key_expr, sharding_key_column_name, - query_info.cluster, distributed_settings, additional_shard_filter_generator); From 8bf7c2c5971afc22dda32f9f4ad453ac481f2359 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 28 Feb 2024 15:40:42 +0100 Subject: [PATCH 075/985] Use output header --- src/Processors/QueryPlan/ReadFromRemote.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 022c4f699f2..fde2313bc15 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -216,7 +216,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream }; pipes.emplace_back(createDelayedPipe(shard.header, lazily_create_stream, add_totals, add_extremes)); - addConvertingActions(pipes.back(), shard.header); + addConvertingActions(pipes.back(), output_stream->header); } void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard) @@ -297,7 +297,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addConvertingActions(pipes.back(), shard.header); + addConvertingActions(pipes.back(), output_stream->header); } } else @@ -326,7 +326,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addConvertingActions(pipes.back(), shard.header); + addConvertingActions(pipes.back(), output_stream->header); } } From d2ea882bd8105f5d2e173a6670bf23b2917b3190 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Feb 2024 21:26:19 +0000 Subject: [PATCH 076/985] Fix deadlock in parallel parsing when lots of rows are skipped due to errors --- .../Formats/Impl/ParallelParsingInputFormat.cpp | 4 +++- .../03001_parallel_parsing_deadlock.reference | 0 .../0_stateless/03001_parallel_parsing_deadlock.sh | 12 ++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference create mode 100755 tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 8b6969bbfcc..447adb1ed48 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -224,7 +224,9 @@ Chunk ParallelParsingInputFormat::read() /// skipped all rows. For example, it can happen while using settings /// input_format_allow_errors_num/input_format_allow_errors_ratio /// and this segment contained only rows with errors. - /// Process the next unit. + /// Return this empty unit back to segmentator and process the next unit. + unit->status = READY_TO_INSERT; + segmentator_condvar.notify_all(); ++reader_ticket_number; unit = &processing_units[reader_ticket_number % processing_units.size()]; } diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh new file mode 100755 index 00000000000..1bf21dfc53b --- /dev/null +++ b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-cpu-aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -q "select number > 1000000 ? 'error' : toString(number) from numbers(2000000) format CSV" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select * from file($DATA_FILE, CSV, 'x UInt64') format Null settings input_format_allow_errors_ratio=1" +rm $DATA_FILE + From 974ba7364f193838f735a9233c6dec4298172542 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 29 Feb 2024 00:55:17 +0100 Subject: [PATCH 077/985] better --- src/Disks/ObjectStorages/S3/diskSettings.cpp | 16 +++++++++++++--- src/IO/S3/Client.cpp | 2 ++ src/IO/S3/URI.cpp | 2 +- src/IO/WriteBufferFromS3.h | 2 +- src/Storages/StorageS3.cpp | 4 ++++ 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index b8688cd3de6..10172805f06 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -1,5 +1,6 @@ #include -#include "IO/S3/Client.h" +#include +#include #if USE_AWS_S3 @@ -10,7 +11,7 @@ #include #include #include -#include "Disks/DiskFactory.h" +#include #include #include @@ -25,6 +26,11 @@ namespace DB { +namespace ErrorCodes +{ +extern const int NO_ELEMENTS_IN_CONFIG; +} + std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { const Settings & settings = context->getSettingsRef(); @@ -47,11 +53,15 @@ std::unique_ptr getClient( const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); const Settings & local_settings = context->getSettingsRef(); - String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); + const String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint")); S3::URI uri(endpoint); if (!uri.key.ends_with('/')) uri.key.push_back('/'); + if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region")) + throw Exception( + ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix); + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( config.getString(config_prefix + ".region", ""), context->getRemoteHostFilter(), diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index a75d41df3d1..4f93aba2f84 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -1,4 +1,5 @@ #include +#include #if USE_AWS_S3 @@ -965,6 +966,7 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT bool isS3ExpressEndpoint(const std::string & endpoint) { + /// On one hand this check isn't 100% reliable, on the other - all it will change is whether we attach checksums to the requests. return endpoint.contains("s3express"); } } diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 062d3b80850..027cb624ed5 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -122,7 +122,7 @@ URI::URI(const std::string & uri_) "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name)); - if (name == COS || name == COSN) + if (name == COS) storage_name = COSN; else storage_name = name; diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 148cd27f854..59f4e19e15b 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -100,7 +100,7 @@ private: /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts. String multipart_upload_id; std::deque multipart_tags; - std::deque multipart_checksums; + std::deque multipart_checksums; // if enabled bool multipart_upload_finished = false; /// Track that prefinalize() is called only once diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 044a1ca5362..f96ff8b7eb6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -133,6 +133,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int CANNOT_COMPILE_REGEXP; extern const int FILE_DOESNT_EXIST; + extern const int NO_ELEMENTS_IN_CONFIG; } @@ -1403,6 +1404,9 @@ void StorageS3::Configuration::connect(const ContextPtr & context) const Settings & global_settings = context->getGlobalContext()->getSettingsRef(); const Settings & local_settings = context->getSettingsRef(); + if (S3::isS3ExpressEndpoint(url.endpoint) && auth_settings.region.empty()) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets"); + S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( auth_settings.region, context->getRemoteHostFilter(), From 37917a3ed34df22756562a04a90d3c985ca23bd8 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 29 Feb 2024 01:42:32 +0100 Subject: [PATCH 078/985] better --- src/IO/S3/Requests.h | 12 ++++++++++++ src/IO/WriteBufferFromS3.cpp | 10 +++------- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/IO/S3/Requests.h b/src/IO/S3/Requests.h index 6f82a0f39d3..196f074c9df 100644 --- a/src/IO/S3/Requests.h +++ b/src/IO/S3/Requests.h @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -39,6 +40,17 @@ inline void setPartChecksum(Model::CompletedPart & part, const std::string & che part.SetChecksumCRC32(checksum); } +inline void setRequestChecksum(Model::UploadPartRequest & req, const std::string & checksum) +{ + req.SetChecksumCRC32(checksum); +} + +inline std::string calculateChecksum(Model::UploadPartRequest & req) +{ + chassert(req.GetChecksumAlgorithm() == Aws::S3::Model::ChecksumAlgorithm::CRC32); + return Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateCRC32(*(req.GetBody()))); +} + template inline void setChecksumAlgorithm(R & request) { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index a162992278f..80ca96b0382 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -18,10 +18,6 @@ #include #include -#include -#include -#include - #include @@ -461,9 +457,9 @@ S3::UploadPartRequest WriteBufferFromS3::getUploadRequest(size_t part_number, Pa /// Checksums need to be provided on CompleteMultipartUpload requests, so we calculate then manually and store in multipart_checksums if (client_ptr->isS3ExpressBucket()) { - chassert(req.GetChecksumAlgorithm() == Aws::S3::Model::ChecksumAlgorithm::CRC32); - req.SetChecksumCRC32(Aws::Utils::HashingUtils::Base64Encode(Aws::Utils::HashingUtils::CalculateCRC32(*(req.GetBody())))); - multipart_checksums.push_back(req.GetChecksumCRC32()); + auto checksum = S3::RequestChecksum::calculateChecksum(req); + S3::RequestChecksum::setRequestChecksum(req, checksum); + multipart_checksums.push_back(std::move(checksum)); } return req; From 6fbd298b3d7cc06b1f11727263a25bc613f7c295 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Feb 2024 05:03:09 +0300 Subject: [PATCH 079/985] Revert "Revert "Use `MergeTree` as a default table engine"" --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 1 + tests/queries/0_stateless/02184_default_table_engine.sql | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ae6ea165cc9..5f52396d3bb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -566,7 +566,7 @@ class IColumn; M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ - M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ + M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e8d013d13ec..661e7cb80da 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -133,6 +133,7 @@ static std::map sett {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index a984ec1b6c9..aff30eeea98 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -1,3 +1,5 @@ +SET default_table_engine = 'None'; + CREATE TABLE table_02184 (x UInt8); --{serverError 119} SET default_table_engine = 'Log'; CREATE TABLE table_02184 (x UInt8); From 0d4648b535a61561d122c87cf181434215753b35 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:30:17 +0800 Subject: [PATCH 080/985] Fix clang-tidy --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index eba57969580..c0b45e1d46a 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -32,7 +32,7 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_) else description = "/"; - fs::create_directories(getCommonKeyPrefix()); + fs::create_directories(key_prefix); } bool LocalObjectStorage::exists(const StoredObject & object) const From 3188c1ebdac52efbdadb8f64a13b0c4b6f4e1acc Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 28 Feb 2024 13:51:48 +0800 Subject: [PATCH 081/985] Update test.py --- tests/integration/test_filesystem_cache/test.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index f32fa4e9823..0cb1866f8e4 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -94,12 +94,21 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): cache_state = node.query( "SELECT key, file_segment_range_begin, size FROM system.filesystem_cache WHERE size > 0 ORDER BY key, file_segment_range_begin, size" ) + keys = ( + node.query( + "SELECT distinct(key) FROM system.filesystem_cache WHERE size > 0 ORDER BY key, file_segment_range_begin, size" + ) + .strip() + .splitlines() + ) node.restart_clickhouse() - assert cache_count == int(node.query("SELECT count() FROM system.filesystem_cache")) + # < because of additional files loaded into cache on server startup. + assert cache_count <= int(node.query("SELECT count() FROM system.filesystem_cache")) + keys_set = ",".join(["'" + x + "'" for x in keys]) assert cache_state == node.query( - "SELECT key, file_segment_range_begin, size FROM system.filesystem_cache ORDER BY key, file_segment_range_begin, size" + f"SELECT key, file_segment_range_begin, size FROM system.filesystem_cache WHERE key in ({keys_set}) ORDER BY key, file_segment_range_begin, size" ) assert node.contains_in_log("Loading filesystem cache with 30 threads") From f8561b2265b924c64c60bdbc5305785c0f0b6f2e Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 29 Feb 2024 13:53:27 +0100 Subject: [PATCH 082/985] Revert "Revert "Support resource request canceling"" --- docs/en/operations/system-tables/scheduler.md | 4 + src/Common/Scheduler/ISchedulerNode.h | 2 + src/Common/Scheduler/ISchedulerQueue.h | 6 ++ src/Common/Scheduler/Nodes/FairPolicy.h | 99 ++++++++++--------- src/Common/Scheduler/Nodes/FifoQueue.h | 31 ++++-- src/Common/Scheduler/Nodes/PriorityPolicy.h | 38 ++++--- .../tests/gtest_dynamic_resource_manager.cpp | 1 - .../Nodes/tests/gtest_resource_scheduler.cpp | 63 ++++++++++++ src/Common/Scheduler/ResourceGuard.h | 9 +- src/Common/Scheduler/ResourceRequest.cpp | 13 +++ src/Common/Scheduler/ResourceRequest.h | 30 +++--- src/Common/Scheduler/SchedulerRoot.h | 32 +++--- .../System/StorageSystemScheduler.cpp | 4 + 13 files changed, 224 insertions(+), 108 deletions(-) create mode 100644 src/Common/Scheduler/ResourceRequest.cpp diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md index 953db4c28f2..c4de7f76fdc 100644 --- a/docs/en/operations/system-tables/scheduler.md +++ b/docs/en/operations/system-tables/scheduler.md @@ -26,7 +26,9 @@ priority: 0 is_active: 0 active_children: 0 dequeued_requests: 67 +canceled_requests: 0 dequeued_cost: 4692272 +canceled_cost: 0 busy_periods: 63 vruntime: 938454.1999999989 system_vruntime: ᴺᵁᴸᴸ @@ -54,7 +56,9 @@ Columns: - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. - `active_children` (`UInt64`) - The number of children in active state. - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node. - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node. - `busy_periods` (`UInt64`) - The total number of deactivations of this node. - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 804026d7bf4..20c1f4332da 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -387,7 +387,9 @@ public: /// Introspection std::atomic dequeued_requests{0}; + std::atomic canceled_requests{0}; std::atomic dequeued_cost{0}; + std::atomic canceled_cost{0}; std::atomic busy_periods{0}; }; diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h index cbe63bd304a..532f4bf6c63 100644 --- a/src/Common/Scheduler/ISchedulerQueue.h +++ b/src/Common/Scheduler/ISchedulerQueue.h @@ -50,6 +50,12 @@ public: /// Should be called outside of scheduling subsystem, implementation must be thread-safe. virtual void enqueueRequest(ResourceRequest * request) = 0; + /// Cancel previously enqueued request. + /// Returns `false` and does nothing given unknown or already executed request. + /// Returns `true` if requests has been found and canceled. + /// Should be called outside of scheduling subsystem, implementation must be thread-safe. + virtual bool cancelRequest(ResourceRequest * request) = 0; + /// For introspection ResourceCost getBudget() const { diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index c0e187e6fa9..ce2bf729a04 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -134,56 +134,65 @@ public: std::pair dequeueRequest() override { - if (heap_size == 0) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - std::pop_heap(items.begin(), items.begin() + heap_size); - Item & current = items[heap_size - 1]; - - // SFQ fairness invariant: system vruntime equals last served request start-time - assert(current.vruntime >= system_vruntime); - system_vruntime = current.vruntime; - - // By definition vruntime is amount of consumed resource (cost) divided by weight - current.vruntime += double(request->cost) / current.child->info.weight; - max_vruntime = std::max(max_vruntime, current.vruntime); - - if (child_active) // Put active child back in heap after vruntime update + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` + while (true) { - std::push_heap(items.begin(), items.begin() + heap_size); - } - else // Deactivate child if it is empty, but remember it's vruntime for latter activations - { - heap_size--; + if (heap_size == 0) + return {nullptr, false}; - // Store index of this inactive child in `parent.idx` - // This enables O(1) search of inactive children instead of O(n) - current.child->info.parent.idx = heap_size; - } + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + std::pop_heap(items.begin(), items.begin() + heap_size); + Item & current = items[heap_size - 1]; - // Reset any difference between children on busy period end - if (heap_size == 0) - { - // Reset vtime to zero to avoid floating-point error accumulation, - // but do not reset too often, because it's O(N) - UInt64 ns = clock_gettime_ns(); - if (last_reset_ns + 1000000000 < ns) + if (request) { - last_reset_ns = ns; - for (Item & item : items) - item.vruntime = 0; - max_vruntime = 0; - } - system_vruntime = max_vruntime; - busy_periods++; - } + // SFQ fairness invariant: system vruntime equals last served request start-time + assert(current.vruntime >= system_vruntime); + system_vruntime = current.vruntime; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, heap_size > 0}; + // By definition vruntime is amount of consumed resource (cost) divided by weight + current.vruntime += double(request->cost) / current.child->info.weight; + max_vruntime = std::max(max_vruntime, current.vruntime); + } + + if (child_active) // Put active child back in heap after vruntime update + { + std::push_heap(items.begin(), items.begin() + heap_size); + } + else // Deactivate child if it is empty, but remember it's vruntime for latter activations + { + heap_size--; + + // Store index of this inactive child in `parent.idx` + // This enables O(1) search of inactive children instead of O(n) + current.child->info.parent.idx = heap_size; + } + + // Reset any difference between children on busy period end + if (heap_size == 0) + { + // Reset vtime to zero to avoid floating-point error accumulation, + // but do not reset too often, because it's O(N) + UInt64 ns = clock_gettime_ns(); + if (last_reset_ns + 1000000000 < ns) + { + last_reset_ns = ns; + for (Item & item : items) + item.vruntime = 0; + max_vruntime = 0; + } + system_vruntime = max_vruntime; + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, heap_size > 0}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 38ae902bc2f..45ed32343ff 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -39,8 +39,7 @@ public: void enqueueRequest(ResourceRequest * request) override { - std::unique_lock lock(mutex); - request->enqueue_ns = clock_gettime_ns(); + std::lock_guard lock(mutex); queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); @@ -50,7 +49,7 @@ public: std::pair dequeueRequest() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (requests.empty()) return {nullptr, false}; ResourceRequest * result = requests.front(); @@ -63,9 +62,29 @@ public: return {result, !requests.empty()}; } + bool cancelRequest(ResourceRequest * request) override + { + std::lock_guard lock(mutex); + // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N) + for (auto i = requests.begin(), e = requests.end(); i != e; ++i) + { + if (*i == request) + { + requests.erase(i); + if (requests.empty()) + busy_periods++; + queue_cost -= request->cost; + canceled_requests++; + canceled_cost += request->cost; + return true; + } + } + return false; + } + bool isActive() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return !requests.empty(); } @@ -98,14 +117,14 @@ public: std::pair getQueueLengthAndCost() { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return {requests.size(), queue_cost}; } private: std::mutex mutex; Int64 queue_cost = 0; - std::deque requests; + std::deque requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel }; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index 6d6b15bd063..9b4cfc37f8c 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -102,25 +102,31 @@ public: std::pair dequeueRequest() override { - if (items.empty()) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - - // Deactivate child if it is empty - if (!child_active) + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` + while (true) { - std::pop_heap(items.begin(), items.end()); - items.pop_back(); if (items.empty()) - busy_periods++; - } + return {nullptr, false}; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, !items.empty()}; + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + + // Deactivate child if it is empty + if (!child_active) + { + std::pop_heap(items.begin(), items.end()); + items.pop_back(); + if (items.empty()) + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, !items.empty()}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp index 961a3b6f713..cdf09776077 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp @@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke) { ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking); gA.lock(); - gA.setFailure(); gA.unlock(); ResourceGuard gB(cB->get("res1")); diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index 9fefbc02cbd..e76639a4b01 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -4,6 +4,7 @@ #include +#include #include using namespace DB; @@ -73,6 +74,22 @@ struct ResourceHolder } }; +struct MyRequest : public ResourceRequest +{ + std::function on_execute; + + explicit MyRequest(ResourceCost cost_, std::function on_execute_) + : ResourceRequest(cost_) + , on_execute(on_execute_) + {} + + void execute() override + { + if (on_execute) + on_execute(); + } +}; + TEST(SchedulerRoot, Smoke) { ResourceTest t; @@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke) EXPECT_TRUE(fc2->requests.contains(&rg.request)); } } + +TEST(SchedulerRoot, Cancel) +{ + ResourceTest t; + + ResourceHolder r1(t); + auto * fc1 = r1.add("/", "1"); + r1.add("/prio"); + auto a = r1.addQueue("/prio/A", "1"); + auto b = r1.addQueue("/prio/B", "2"); + r1.registerResource(); + + std::barrier sync(2); + std::thread consumer1([&] + { + std::barrier destruct_sync(2); + MyRequest request(1,[&] + { + sync.arrive_and_wait(); // (A) + EXPECT_TRUE(fc1->requests.contains(&request)); + sync.arrive_and_wait(); // (B) + request.finish(); + destruct_sync.arrive_and_wait(); // (C) + }); + a.queue->enqueueRequest(&request); + destruct_sync.arrive_and_wait(); // (C) + }); + + std::thread consumer2([&] + { + MyRequest request(1,[&] + { + FAIL() << "This request must be canceled, but instead executes"; + }); + sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately + b.queue->enqueueRequest(&request); + bool canceled = b.queue->cancelRequest(&request); + EXPECT_TRUE(canceled); + sync.arrive_and_wait(); // (B) release request of consumer1 to be finished + }); + + consumer1.join(); + consumer2.join(); + + EXPECT_TRUE(fc1->requests.empty()); +} diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h index dca4041b176..50f665a384b 100644 --- a/src/Common/Scheduler/ResourceGuard.h +++ b/src/Common/Scheduler/ResourceGuard.h @@ -71,8 +71,7 @@ public: // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread chassert(state == Dequeued); state = Finished; - if (constraint) - constraint->finishRequest(this); + ResourceRequest::finish(); } static Request & local() @@ -126,12 +125,6 @@ public: } } - /// Mark request as unsuccessful; by default request is considered to be successful - void setFailure() - { - request.successful = false; - } - ResourceLink link; Request & request; }; diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp new file mode 100644 index 00000000000..26e8084cdfa --- /dev/null +++ b/src/Common/Scheduler/ResourceRequest.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +void ResourceRequest::finish() +{ + if (constraint) + constraint->finishRequest(this); +} + +} diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h index 3d2230746f9..f3153ad382c 100644 --- a/src/Common/Scheduler/ResourceRequest.h +++ b/src/Common/Scheduler/ResourceRequest.h @@ -14,9 +14,6 @@ class ISchedulerConstraint; using ResourceCost = Int64; constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); -/// Timestamps (nanoseconds since epoch) -using ResourceNs = UInt64; - /* * Request for a resource consumption. The main moving part of the scheduling subsystem. * Resource requests processing workflow: @@ -31,7 +28,7 @@ using ResourceNs = UInt64; * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. * 4) Callback ResourceRequest::execute() is called to provide access to the resource. * 5) The resource consumption is happening outside of the scheduling subsystem. - * 6) request->constraint->finishRequest() is called when consumption is finished. + * 6) ResourceRequest::finish() is called when consumption is finished. * * Steps (5) and (6) can be omitted if constraint is not used by the resource. * @@ -39,7 +36,10 @@ using ResourceNs = UInt64; * Request ownership is done outside of the scheduling subsystem. * After (6) request can be destructed safely. * - * Request cancelling is not supported yet. + * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest(). + * Returning false means it is too late for request to be canceled. It should be processed in a regular way. + * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen + * and step (6) MUST be omitted. */ class ResourceRequest { @@ -48,32 +48,20 @@ public: /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; - /// Request outcome - /// Should be filled during resource consumption - bool successful; - /// Scheduler node to be notified on consumption finish /// Auto-filled during request enqueue/dequeue ISchedulerConstraint * constraint; - /// Timestamps for introspection - ResourceNs enqueue_ns; - ResourceNs execute_ns; - ResourceNs finish_ns; - explicit ResourceRequest(ResourceCost cost_ = 1) { reset(cost_); } + /// ResourceRequest object may be reused again after reset() void reset(ResourceCost cost_) { cost = cost_; - successful = true; constraint = nullptr; - enqueue_ns = 0; - execute_ns = 0; - finish_ns = 0; } virtual ~ResourceRequest() = default; @@ -83,6 +71,12 @@ public: /// just triggering start of a consumption, not doing the consumption itself /// (e.g. setting an std::promise or creating a job in a thread pool) virtual void execute() = 0; + + /// Stop resource consumption and notify resource scheduler. + /// Should be called when resource consumption is finished by consumer. + /// ResourceRequest should not be destructed or reset before calling to `finish()`. + /// WARNING: this function MUST not be called if request was canceled. + void finish(); }; } diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index 3a23a8df834..ab3f702a422 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -145,22 +145,27 @@ public: std::pair dequeueRequest() override { - if (current == nullptr) // No active resources - return {nullptr, false}; + while (true) + { + if (current == nullptr) // No active resources + return {nullptr, false}; - // Dequeue request from current resource - auto [request, resource_active] = current->root->dequeueRequest(); - assert(request != nullptr); + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); - // Deactivate resource if required - if (!resource_active) - deactivate(current); - else - current = current->next; // Just move round-robin pointer + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer - dequeued_requests++; - dequeued_cost += request->cost; - return {request, current != nullptr}; + if (request == nullptr) // Possible in case of request cancel, just retry + continue; + + dequeued_requests++; + dequeued_cost += request->cost; + return {request, current != nullptr}; + } } bool isActive() override @@ -245,7 +250,6 @@ private: void execute(ResourceRequest * request) { - request->execute_ns = clock_gettime_ns(); request->execute(); } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index ba07d44dbf9..633bac5d285 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -30,7 +30,9 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, {"active_children", std::make_shared(), "The number of children in active state."}, {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, + {"canceled_requests", std::make_shared(), "The total number of resource requests canceled from this node."}, {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"canceled_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."}, {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, {"vruntime", std::make_shared(std::make_shared()), "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, @@ -93,7 +95,9 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(node->isActive()); res_columns[i++]->insert(node->activeChildren()); res_columns[i++]->insert(node->dequeued_requests.load()); + res_columns[i++]->insert(node->canceled_requests.load()); res_columns[i++]->insert(node->dequeued_cost.load()); + res_columns[i++]->insert(node->canceled_cost.load()); res_columns[i++]->insert(node->busy_periods.load()); Field vruntime; From 8b1a1d42daa01e946aa8102d683dbab90b447838 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Thu, 29 Feb 2024 18:07:00 +0300 Subject: [PATCH 083/985] Traverse shadow directory for system.remote_data_paths --- src/Core/Settings.h | 1 + src/Disks/IDisk.h | 5 ++++- src/Disks/ObjectStorages/DiskObjectStorage.cpp | 10 ++++++++-- src/Disks/ObjectStorages/DiskObjectStorage.h | 5 ++++- .../System/StorageSystemRemoteDataPaths.cpp | 13 +++++++++++++ 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d77b3a45188..7cf068d7f1f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -843,6 +843,7 @@ class IColumn; M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \ M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \ + M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 3d228850537..62b02938d1a 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -320,7 +320,10 @@ public: {} }; - virtual void getRemotePathsRecursive(const String &, std::vector &) + virtual void getRemotePathsRecursive( + const String &, + std::vector &, + const std::function & /* skip_predicate */ = {}) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 2a648f28f14..460d242d5cd 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -90,11 +90,17 @@ StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) co return metadata_storage->getStorageObjects(local_path); } -void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector & paths_map) +void DiskObjectStorage::getRemotePathsRecursive( + const String & local_path, + std::vector & paths_map, + const std::function & skip_predicate) { if (!metadata_storage->exists(local_path)) return; + if (skip_predicate && skip_predicate(local_path)) + return; + /// Protect against concurrent delition of files (for example because of a merge). if (metadata_storage->isFile(local_path)) { @@ -142,7 +148,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: } for (; it->isValid(); it->next()) - DiskObjectStorage::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map); + DiskObjectStorage::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map, skip_predicate); } } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index e1576509713..d7af656bea3 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -48,7 +48,10 @@ public: StoredObjects getStorageObjects(const String & local_path) const override; - void getRemotePathsRecursive(const String & local_path, std::vector & paths_map) override; + void getRemotePathsRecursive( + const String & local_path, + std::vector & paths_map, + const std::function & skip_predicate = {}) override; const std::string & getCacheName() const override { return object_storage->getCacheName(); } diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 87b7a84e8ba..708c1369965 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -9,6 +9,7 @@ #include #include +namespace fs = std::filesystem; namespace DB { @@ -59,6 +60,18 @@ Pipe StorageSystemRemoteDataPaths::read( std::vector remote_paths_by_local_path; disk->getRemotePathsRecursive("store", remote_paths_by_local_path); disk->getRemotePathsRecursive("data", remote_paths_by_local_path); + if (context->getSettingsRef().traverse_shadow_remote_data_paths) + disk->getRemotePathsRecursive( + "shadow", + remote_paths_by_local_path, + [](const String & local_path) + { + // `shadow/{backup_name}/revision.txt` is not an object metadata file + const auto path = fs::path(local_path); + return path.filename() == "revision.txt" && + path.parent_path().has_parent_path() && + path.parent_path().parent_path().filename() == "shadow"; + }); FileCachePtr cache; From 5641fd8ba9c4f27794367e22632365df5cdf0303 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 29 Feb 2024 16:13:05 +0100 Subject: [PATCH 084/985] Fix build after merge --- src/IO/WriteBufferFromS3TaskTracker.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromS3TaskTracker.h b/src/IO/WriteBufferFromS3TaskTracker.h index 134abbbc4c1..4061f084a76 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.h +++ b/src/IO/WriteBufferFromS3TaskTracker.h @@ -1,7 +1,7 @@ #pragma once #include "config.h" -#include +#include #include "WriteBufferFromS3.h" #include From 7632c2c33f357c1c616f734c7bf2502ccbfbd496 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Feb 2024 15:17:12 +0000 Subject: [PATCH 085/985] Remove non-deterministic functions in virtual columns filter --- src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ src/Storages/VirtualColumnUtils.cpp | 21 +++++++++++++++++++ ...with_non_deterministic_functions.reference | 11 ++++++++++ ...lumns_with_non_deterministic_functions.sql | 6 ++++++ 4 files changed, 40 insertions(+) create mode 100644 tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference create mode 100644 tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8aa188cfe5c..6494ed5d844 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1082,6 +1082,8 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + if (!filter_dag) + return {}; // Generate valid expressions for filtering bool valid = true; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 33ff6e7104f..3e0ef1d7990 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -467,6 +467,23 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } +static bool isDeterministic(const ActionsDAG::Node * node) +{ + if (node->type != ActionsDAG::ActionType::FUNCTION) + return true; + + if (!node->function_base->isDeterministic()) + return false; + + for (const auto * child : node->children) + { + if (!isDeterministic(child)) + return false; + } + + return true; +} + static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, @@ -542,6 +559,10 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( } } } + else if (!isDeterministic(node)) + { + return nullptr; + } } if (allowed_inputs && !canEvaluateSubtree(node, *allowed_inputs)) diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference new file mode 100644 index 00000000000..4c9646d6ffa --- /dev/null +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference @@ -0,0 +1,11 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql new file mode 100644 index 00000000000..9f8bc6bd3d7 --- /dev/null +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -0,0 +1,6 @@ +create table test (number UInt64) engine=MergeTree order by number; +insert into test select * from numbers(100000000); +select ignore(number) from test where RAND() > 4292390314 limit 10; +select count() > 0 from test where RAND() > 4292390314; +drop table test; + From 09a392772d75b38e1b19ad6bd2a863168ea0de5c Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Feb 2024 15:34:45 +0000 Subject: [PATCH 086/985] Use isDeterministicInScopeOfQuery --- src/Storages/VirtualColumnUtils.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 3e0ef1d7990..6d66453442e 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -467,17 +467,17 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } -static bool isDeterministic(const ActionsDAG::Node * node) +static bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) { if (node->type != ActionsDAG::ActionType::FUNCTION) return true; - if (!node->function_base->isDeterministic()) + if (!node->function_base->isDeterministicInScopeOfQuery()) return false; for (const auto * child : node->children) { - if (!isDeterministic(child)) + if (!isDeterministicInScopeOfQuery(child)) return false; } @@ -559,7 +559,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( } } } - else if (!isDeterministic(node)) + else if (!isDeterministicInScopeOfQuery(node)) { return nullptr; } From 55053dae4459b1d1a6c05d436d1ab421a96c3934 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 29 Feb 2024 19:18:06 +0100 Subject: [PATCH 087/985] Some progress --- src/Analyzer/IdentifierNode.cpp | 10 +--- src/Analyzer/IdentifierNode.h | 6 --- src/DataTypes/ObjectUtils.cpp | 24 +++++----- src/DataTypes/ObjectUtils.h | 4 +- .../ClusterProxy/SelectStreamFactory.cpp | 10 ++-- .../ClusterProxy/SelectStreamFactory.h | 8 +++- src/Processors/QueryPlan/ReadFromRemote.cpp | 47 +++++++++++++++++++ .../test_distributed_type_object/test.py | 2 +- 8 files changed, 80 insertions(+), 31 deletions(-) diff --git a/src/Analyzer/IdentifierNode.cpp b/src/Analyzer/IdentifierNode.cpp index 7e4d4c02a4c..88b3daacb12 100644 --- a/src/Analyzer/IdentifierNode.cpp +++ b/src/Analyzer/IdentifierNode.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -57,18 +56,13 @@ void IdentifierNode::updateTreeHashImpl(HashState & state) const QueryTreeNodePtr IdentifierNode::cloneImpl() const { - auto result = std::make_shared(identifier); - result->use_parts_for_to_ast = use_parts_for_to_ast; - return result; + return std::make_shared(identifier); } ASTPtr IdentifierNode::toASTImpl(const ConvertToASTOptions & /* options */) const { auto identifier_parts = identifier.getParts(); - if (use_parts_for_to_ast) - return std::make_shared(std::move(identifier_parts)); - else - return std::make_shared(identifier.getFullName()); + return std::make_shared(std::move(identifier_parts)); } } diff --git a/src/Analyzer/IdentifierNode.h b/src/Analyzer/IdentifierNode.h index 3bc37b4c69d..872bb14d512 100644 --- a/src/Analyzer/IdentifierNode.h +++ b/src/Analyzer/IdentifierNode.h @@ -52,11 +52,6 @@ public: void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; - void useFullNameInToAST() - { - use_parts_for_to_ast = false; - } - protected: bool isEqualImpl(const IQueryTreeNode & rhs) const override; @@ -69,7 +64,6 @@ protected: private: Identifier identifier; std::optional table_expression_modifiers; - bool use_parts_for_to_ast = false; static constexpr size_t children_size = 0; }; diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 01ba50d90f3..47d8c5c9113 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -965,30 +965,32 @@ void replaceMissedSubcolumnsByConstants( /// @expected_columns and @available_columns contain descriptions /// of extended Object columns. -void replaceMissedSubcolumnsByConstants( +MissingObjectList replaceMissedSubcolumnsByConstants( const ColumnsDescription & expected_columns, const ColumnsDescription & available_columns, QueryTreeNodePtr & query, const ContextPtr & context [[maybe_unused]]) { + MissingObjectList missed_list; + NamesAndTypes missed_names_types = calculateMissedSubcolumns(expected_columns, available_columns); if (missed_names_types.empty()) - return; + return missed_list; auto * query_node = query->as(); if (!query_node) - return; + return missed_list; + + missed_list.reserve(missed_names_types.size()); auto table_expression = extractLeftTableExpression(query_node->getJoinTree()); - auto & with_nodes = query_node->getWith().getNodes(); - std::unordered_map column_name_to_node; for (const auto & [name, type] : missed_names_types) { auto constant = std::make_shared(type->getDefault(), type); - constant->setAlias(table_expression->getAlias() + name); + constant->setAlias(table_expression->getAlias() + "." + name); // auto materialize = std::make_shared("materialize"); // auto function = FunctionFactory::instance().get("materialize", context); @@ -996,17 +998,17 @@ void replaceMissedSubcolumnsByConstants( // materialize->resolveAsFunction(function->build(materialize->getArgumentColumns())); // materialize->setAlias(name); - with_nodes.push_back(constant); - - auto id = std::make_shared(Identifier(table_expression->getAlias() + name)); - id->useFullNameInToAST(); - column_name_to_node[name] = id; + column_name_to_node[name] = buildCastFunction(constant, type, context); + missed_list.push_back({ constant->getValueStringRepresentation() + "_" + constant->getResultType()->getName(), table_expression->getAlias() + "." + name }); + LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "{} -> {}", missed_list.back().first, missed_list.back().second); LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Name {} Expression\n{}", name, column_name_to_node[name]->dumpTree()); } LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Table expression\n{} ", table_expression->dumpTree()); replaceColumns(query, table_expression, column_name_to_node); LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Result:\n{} ", query->dumpTree()); + + return missed_list; } Field FieldVisitorReplaceScalars::operator()(const Array & x) const diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index f4a8abe8abf..013e525832e 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -102,7 +102,9 @@ void replaceMissedSubcolumnsByConstants( const ColumnsDescription & available_columns, ASTPtr query); -void replaceMissedSubcolumnsByConstants( +using MissingObjectList = std::vector>; + +MissingObjectList replaceMissedSubcolumnsByConstants( const ColumnsDescription & expected_columns, const ColumnsDescription & available_columns, QueryTreeNodePtr & query, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 5167ffc0e27..5bcd1ce68cb 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -151,7 +151,8 @@ void SelectStreamFactory::createForShardImpl( Shards & remote_shards, UInt32 shard_count, bool parallel_replicas_enabled, - AdditionalShardFilterGenerator shard_filter_generator) + AdditionalShardFilterGenerator shard_filter_generator, + MissingObjectList missed_list) { auto emplace_local_stream = [&]() { @@ -177,6 +178,7 @@ void SelectStreamFactory::createForShardImpl( .query = query_ast, .main_table = main_table, .header = shard_header, + .missing_object_list = std::move(missed_list), .shard_info = shard_info, .lazy = lazy, .local_delay = local_delay, @@ -299,8 +301,9 @@ void SelectStreamFactory::createForShard( auto it = objects_by_shard.find(shard_info.shard_num); QueryTreeNodePtr modified_query = query_tree; + MissingObjectList missed_list; if (it != objects_by_shard.end()) - replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, modified_query, context); + missed_list = replaceMissedSubcolumnsByConstants(storage_snapshot->object_columns, it->second, modified_query, context); auto query_ast = queryNodeToDistributedSelectQuery(modified_query); @@ -314,7 +317,8 @@ void SelectStreamFactory::createForShard( remote_shards, shard_count, parallel_replicas_enabled, - std::move(shard_filter_generator)); + std::move(shard_filter_generator), + std::move(missed_list)); } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 45d6ea14c01..bee7edb3c19 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -43,6 +44,8 @@ ASTPtr rewriteSelectQuery( using ColumnsDescriptionByShardNum = std::unordered_map; using AdditionalShardFilterGenerator = std::function; +using MissingObjectList = std::vector>; + class SelectStreamFactory { public: @@ -55,6 +58,8 @@ public: StorageID main_table; Block header; + MissingObjectList missing_object_list; + Cluster::ShardInfo shard_info; /// If we connect to replicas lazily. @@ -112,7 +117,8 @@ private: Shards & remote_shards, UInt32 shard_count, bool parallel_replicas_enabled, - AdditionalShardFilterGenerator shard_filter_generator); + AdditionalShardFilterGenerator shard_filter_generator, + MissingObjectList missed_list = {}); }; } diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index fde2313bc15..ac507c6d555 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include "DataTypes/ObjectUtils.h" #include #include #include @@ -31,6 +33,48 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static void addRenamingActions(Pipe & pipe, const MissingObjectList & missed_list, const Block & output_header) +{ + if (missed_list.empty()) + return; + + const auto & output_columns = output_header.getColumnsWithTypeAndName(); + std::vector indexes; + for (size_t i = 0; i < output_columns.size(); ++i) + { + bool found = false; + for (auto const & elem : missed_list) + { + if (output_columns[i].name.contains(elem.second)) + { + found = true; + break; + } + } + if (found) + indexes.push_back(i); + } + + auto dag = std::make_shared(pipe.getHeader().getColumnsWithTypeAndName()); + + for (size_t index : indexes) + { + dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs()[index], output_header.getByPosition(index).name)); + } + + // dag->addAliases(rename_to_apply); + + auto convert_actions = std::make_shared(dag); + pipe.addSimpleTransform([&](const Block & cur_header, Pipe::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header, convert_actions); + }); + + LOG_DEBUG(&Poco::Logger::get("addRenamingActions"), "EXPECTED:\n{}", output_header.dumpStructure()); + + LOG_DEBUG(&Poco::Logger::get("addRenamingActions"), "{}", pipe.getHeader().dumpStructure()); +} + static void addConvertingActions(Pipe & pipe, const Block & header) { if (blocksHaveEqualStructure(pipe.getHeader(), header)) @@ -216,6 +260,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream }; pipes.emplace_back(createDelayedPipe(shard.header, lazily_create_stream, add_totals, add_extremes)); + addRenamingActions(pipes.back(), shard.missing_object_list, output_stream->header); addConvertingActions(pipes.back(), output_stream->header); } @@ -297,6 +342,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); + addRenamingActions(pipes.back(), shard.missing_object_list, output_stream->header); addConvertingActions(pipes.back(), output_stream->header); } } @@ -326,6 +372,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); + addRenamingActions(pipes.back(), shard.missing_object_list, output_stream->header); addConvertingActions(pipes.back(), output_stream->header); } } diff --git a/tests/integration/test_distributed_type_object/test.py b/tests/integration/test_distributed_type_object/test.py index b2179af8a3f..f77e0248f02 100644 --- a/tests/integration/test_distributed_type_object/test.py +++ b/tests/integration/test_distributed_type_object/test.py @@ -59,7 +59,7 @@ def test_distributed_type_object(started_cluster): ) expected = TSV("120\n") - assert TSV(node1.query("SELECT sum(data.k2 * id) FROM dist_table")) == expected + assert TSV(node1.query("SELECT sum(data.k2 * id) FROM dist_table SETTINGS optimize_arithmetic_operations_in_aggregate_functions = 0")) == expected node1.query("TRUNCATE TABLE local_table") node2.query("TRUNCATE TABLE local_table") From 41deadda359ca02528fa6ffe9ecfed09c36b364a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 29 Feb 2024 18:37:00 +0000 Subject: [PATCH 088/985] Automatic style fix --- tests/integration/test_distributed_type_object/test.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_distributed_type_object/test.py b/tests/integration/test_distributed_type_object/test.py index f77e0248f02..7e6c000cb8e 100644 --- a/tests/integration/test_distributed_type_object/test.py +++ b/tests/integration/test_distributed_type_object/test.py @@ -59,7 +59,14 @@ def test_distributed_type_object(started_cluster): ) expected = TSV("120\n") - assert TSV(node1.query("SELECT sum(data.k2 * id) FROM dist_table SETTINGS optimize_arithmetic_operations_in_aggregate_functions = 0")) == expected + assert ( + TSV( + node1.query( + "SELECT sum(data.k2 * id) FROM dist_table SETTINGS optimize_arithmetic_operations_in_aggregate_functions = 0" + ) + ) + == expected + ) node1.query("TRUNCATE TABLE local_table") node2.query("TRUNCATE TABLE local_table") From a6cebad52bf4f29984db99cd4d4aa1eb41c50895 Mon Sep 17 00:00:00 2001 From: Nataly Merezhuk Date: Thu, 29 Feb 2024 16:32:29 -0500 Subject: [PATCH 089/985] Adds note on supported PostgreSQL versions. --- docs/en/engines/table-engines/integrations/postgresql.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 131df1a435b..9cc4b11243e 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -8,6 +8,10 @@ sidebar_label: PostgreSQL The PostgreSQL engine allows to perform `SELECT` and `INSERT` queries on data that is stored on a remote PostgreSQL server. +:::note +Currently, only PostgreSQL versions 12 and up are supported. +::: + ## Creating a Table {#creating-a-table} ``` sql From 0f2d47e5a444bf78ffef6b2506e50079e6bb55c9 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 1 Mar 2024 10:52:44 +0100 Subject: [PATCH 090/985] Renamed WriteBufferFromS3TaskTracker to ThreadPoolTaskTracker --- .../ThreadPoolTaskTracker.cpp} | 2 +- .../ThreadPoolTaskTracker.h} | 6 +++--- src/Disks/IO/WriteBufferFromAzureBlobStorage.h | 2 +- src/IO/WriteBufferFromS3.cpp | 2 +- src/IO/WriteBufferFromS3.h | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) rename src/{IO/WriteBufferFromS3TaskTracker.cpp => Common/ThreadPoolTaskTracker.cpp} (99%) rename src/{IO/WriteBufferFromS3TaskTracker.h => Common/ThreadPoolTaskTracker.h} (94%) diff --git a/src/IO/WriteBufferFromS3TaskTracker.cpp b/src/Common/ThreadPoolTaskTracker.cpp similarity index 99% rename from src/IO/WriteBufferFromS3TaskTracker.cpp rename to src/Common/ThreadPoolTaskTracker.cpp index e62de261fc2..10207eb6296 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.cpp +++ b/src/Common/ThreadPoolTaskTracker.cpp @@ -1,6 +1,6 @@ #include "config.h" -#include +#include "ThreadPoolTaskTracker.h" namespace ProfileEvents { diff --git a/src/IO/WriteBufferFromS3TaskTracker.h b/src/Common/ThreadPoolTaskTracker.h similarity index 94% rename from src/IO/WriteBufferFromS3TaskTracker.h rename to src/Common/ThreadPoolTaskTracker.h index 4061f084a76..d37b759a913 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.h +++ b/src/Common/ThreadPoolTaskTracker.h @@ -1,10 +1,10 @@ #pragma once #include "config.h" -#include -#include "WriteBufferFromS3.h" +#include "threadPoolCallbackRunner.h" +#include "IO/WriteBufferFromS3.h" -#include +#include "logger_useful.h" #include diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 2d11014fa2a..4897ca9a846 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace Poco diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 6fc0a35672f..510d9bef4d3 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -4,8 +4,8 @@ #include "StdIStreamFromMemory.h" #include "WriteBufferFromS3.h" -#include "WriteBufferFromS3TaskTracker.h" +#include #include #include #include diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 28754d180bf..afd8b9909c1 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include From 6143986b6d79c0262f5f7dc3052ec2a3f4cfc490 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Fri, 1 Mar 2024 14:55:02 +0300 Subject: [PATCH 091/985] Add query test --- ...raverse_shadow_system_data_paths.reference | 3 ++ ...03000_traverse_shadow_system_data_paths.sh | 34 +++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference create mode 100755 tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh new file mode 100755 index 00000000000..a22cb200f9a --- /dev/null +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +TABLE="03000_traverse_shadow_system_data_path_table" +BACKUP="03000_traverse_shadow_system_data_path_backup" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE ${TABLE} ( + id Int64, + data String +) ENGINE=MergeTree() +ORDER BY id +SETTINGS storage_policy='s3_cache';" + +${CLICKHOUSE_CLIENT} --query="INSERT INTO ${TABLE} VALUES (0, 'data');" +${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.remote_data_paths WHERE disk_name = 's3_cache'" + +${CLICKHOUSE_CLIENT} --query="ALTER TABLE ${TABLE} FREEZE WITH NAME '${BACKUP}';" +${CLICKHOUSE_CLIENT} --query="DROP TABLE ${TABLE} SYNC;" + +${CLICKHOUSE_CLIENT} --query " + SELECT count() > 0 + FROM system.remote_data_paths + WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' + SETTINGS traverse_shadow_remote_data_paths=1;" +${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" >/dev/null +${CLICKHOUSE_CLIENT} --query " + SELECT count() == 0 + FROM system.remote_data_paths + WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' + SETTINGS traverse_shadow_remote_data_paths=1;" From a7aeb4c00f106d396364bf2a21697e329d3d284d Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 1 Mar 2024 23:44:58 +0800 Subject: [PATCH 092/985] Add --now option to enable and start the service --- packages/clickhouse-server.postinstall | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/clickhouse-server.postinstall b/packages/clickhouse-server.postinstall index d3b49db758f..41d4405a790 100644 --- a/packages/clickhouse-server.postinstall +++ b/packages/clickhouse-server.postinstall @@ -36,7 +36,7 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then fi /bin/systemctl daemon-reload - /bin/systemctl enable clickhouse-server + /bin/systemctl enable --now clickhouse-server else # If you downgrading to version older than 1.1.54336 run: systemctl disable clickhouse-server if [ -x "/etc/init.d/clickhouse-server" ]; then From 3825cb3ad0d7f2296cf075648d022ef26f1e0cef Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 2 Mar 2024 15:28:45 +0000 Subject: [PATCH 093/985] expand CTE in alter modify query --- src/Interpreters/InterpreterAlterQuery.cpp | 11 +++++++++++ .../0_stateless/03002_modify_query_cte.reference | 2 ++ .../0_stateless/03002_modify_query_cte.sql | 15 +++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/03002_modify_query_cte.reference create mode 100644 tests/queries/0_stateless/03002_modify_query_cte.sql diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index b768593da98..7acaf95becc 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -71,11 +72,15 @@ BlockIO InterpreterAlterQuery::execute() BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) { + ASTSelectWithUnionQuery * modify_query = nullptr; + for (auto & child : alter.command_list->children) { auto * command_ast = child->as(); if (command_ast->sql_security) InterpreterCreateQuery::processSQLSecurityOption(getContext(), command_ast->sql_security->as()); + else if (command_ast->type == ASTAlterCommand::MODIFY_QUERY) + modify_query = command_ast->select->as(); } BlockIO res; @@ -123,6 +128,12 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); + if (modify_query) + { + // Expand CTE before filling default database + ApplyWithSubqueryVisitor().visit(*modify_query); + } + /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc. AddDefaultDatabaseVisitor visitor(getContext(), table_id.getDatabaseName()); ASTPtr command_list_ptr = alter.command_list->ptr(); diff --git a/tests/queries/0_stateless/03002_modify_query_cte.reference b/tests/queries/0_stateless/03002_modify_query_cte.reference new file mode 100644 index 00000000000..a3d66f70f8f --- /dev/null +++ b/tests/queries/0_stateless/03002_modify_query_cte.reference @@ -0,0 +1,2 @@ +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS SELECT ts\nFROM default.table_03002 +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM\nMY_CTE diff --git a/tests/queries/0_stateless/03002_modify_query_cte.sql b/tests/queries/0_stateless/03002_modify_query_cte.sql new file mode 100644 index 00000000000..3a36ce7e7fd --- /dev/null +++ b/tests/queries/0_stateless/03002_modify_query_cte.sql @@ -0,0 +1,15 @@ + +CREATE TABLE table_03002 (ts DateTime, event_type String) ENGINE = MergeTree ORDER BY (event_type, ts); + +CREATE MATERIALIZED VIEW mv_03002 TO table_03002 AS SELECT ts FROM table_03002; + +SHOW CREATE TABLE mv_03002; + +ALTER TABLE mv_03002 MODIFY QUERY +WITH MY_CTE AS (SELECT ts FROM table_03002) +SELECT * FROM MY_CTE; + +SHOW CREATE TABLE mv_03002; + +DROP TABLE mv_03002; +DROP TABLE table_03002; From 17413ded759ebcef809e03a80284f6f805507560 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sat, 2 Mar 2024 11:11:44 -0500 Subject: [PATCH 094/985] Update 03002_modify_query_cte.reference --- tests/queries/0_stateless/03002_modify_query_cte.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03002_modify_query_cte.reference b/tests/queries/0_stateless/03002_modify_query_cte.reference index a3d66f70f8f..50e4a7c6a07 100644 --- a/tests/queries/0_stateless/03002_modify_query_cte.reference +++ b/tests/queries/0_stateless/03002_modify_query_cte.reference @@ -1,2 +1,2 @@ CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS SELECT ts\nFROM default.table_03002 -CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM\nMY_CTE +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM MY_CTE From a6cb302ab54082db5650263d6417052f81f30710 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 3 Mar 2024 15:48:49 +0000 Subject: [PATCH 095/985] fix 'AddressSanitizer: stack-use-after-return' --- src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index e76639a4b01..f8196d15819 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -140,10 +140,10 @@ TEST(SchedulerRoot, Cancel) auto b = r1.addQueue("/prio/B", "2"); r1.registerResource(); + std::barrier destruct_sync(2); std::barrier sync(2); std::thread consumer1([&] { - std::barrier destruct_sync(2); MyRequest request(1,[&] { sync.arrive_and_wait(); // (A) From 77fe221665ac8610e5ae42f547771e1877793ad0 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Sun, 3 Mar 2024 14:25:25 -0700 Subject: [PATCH 096/985] Adds undocumented rand functions. Prettifies markdown. --- .../functions/random-functions.md | 277 +++++++++++++----- 1 file changed, 206 insertions(+), 71 deletions(-) diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 6fd31e8d25c..2ce9c75eae4 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -11,79 +11,213 @@ elimination](../../sql-reference/functions/index.md#common-subexpression-elimina function return different random values. Related content + - Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) :::note The random numbers are generated by non-cryptographic algorithms. ::: -## rand, rand32 +## rand -Returns a random UInt32 number, evenly distributed across the range of all possible UInt32 numbers. +Returns a random UInt32 number. + +### Syntax + +```sql +rand() +``` + +### Parameters + +None. + +### Output + +Returns a number of type UInt32. + +### Example + +```sql +SELECT rand() +``` + +```response +1569354847 +``` + +### Implementation details Uses a linear congruential generator. +## rand32 + +Returns a random 32-bit unsigned integer (UInt32) number. + +### Syntax + +```sql +rand32() +``` + +### Parameters + +None. + +### Output + +Returns a number of type UInt32, evenly distributed across the range of all possible UInt32 values. + +### Example + +```sql +SELECT rand32(); +``` + +```response +2754546224 +``` + +**Note:** The actual output will be a random number, not the specific number shown in the example. + ## rand64 -Returns a random UInt64 number, evenly distributed across the range of all possible UInt64 numbers. +Returns a random 64-bit unsigned integer (UInt64) number. -Uses a linear congruential generator. +### Syntax + +```sql +rand64() +``` + +### Parameters + +None. + +### Implementation details + +The `rand64` function uses a linear congruential generator, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. + +For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. + +### Output + +Returns a number of type UInt64, evenly distributed across the range of all possible UInt64 values. + +### Example + +```sql +SELECT rand64(); +``` + +```response +15030268859237645412 +``` + +**Note:** The actual output will be a random number, not the specific number shown in the example. ## randCanonical -Returns a random Float64 value, evenly distributed in interval [0, 1). +Returns a random floating-point number of type Float64, evenly distributed within the closed interval. + +### Syntax + +```sql +randCanonical() +``` + +### Parameters + +None. + +### Output + +Returns a Float64 value between 0 (inclusive) and 1 (exclusive). + +### Example + +```sql +SELECT randCanonical(); +``` + +```response +0.3452178901234567 +``` + +**Note:** The actual output will be a random decimal number between 0 and 1, not the specific number shown in the example. ## randConstant -Like `rand` but produces a constant column with a random value. +Generates a single constant column filled with a random value. Unlike `rand`, `randConstant` ensures the same random value appears in every row of the generated column, making it useful for scenarios requiring a consistent random seed across rows in a single query. -**Example** +### Syntax -``` sql -SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number) -FROM numbers(3) +```sql +randConstant([x]); ``` -Result: +### Parameters -``` result -┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┐ -│ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │ -│ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │ -│ 956619638 │ 4238287282 │ 1104342490 │ 2740811946 │ 4229401477 │ 1924032898 │ -└────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ +- **[x] (Optional):** An optional expression that influences the generated random value. Even if provided, the resulting value will still be constant within the same query execution. Different queries using the same expression will likely generate different constant values. + +### Implementation details + +The actual output will be different for each query execution, even with the same optional expression. + +The optional parameter may not significantly change the generated value compared to using `randConstant` alone. + +### Output + +Returns a column of type UInt32 containing the same random value in each row. + +### Examples + +```sql +SELECT randConstant() AS random_value; +``` + +```response +| random_value | +|--------------| +| 1234567890 | +``` + +```sql +SELECT randConstant(10) AS random_value; +``` + +```response +| random_value | +|--------------| +| 9876543210 | ``` ## randUniform -Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). +Returns a random Float64 drawn uniformly from interval [`min`, `max`]. -**Syntax** +### Syntax -``` sql +```sql randUniform(min, max) ``` -**Arguments** +### Parameters - `min` - `Float64` - left boundary of the range, - `max` - `Float64` - right boundary of the range. -**Returned value** +### Output -- Random number. +A random number of type [Float64](/docs/en/sql-reference/data-types/float.md). -Type: [Float64](/docs/en/sql-reference/data-types/float.md). +### Example -**Example** - -``` sql +```sql SELECT randUniform(5.5, 10) FROM numbers(5) ``` -Result: - -``` result +```response ┌─randUniform(5.5, 10)─┐ │ 8.094978491443102 │ │ 7.3181248914450885 │ @@ -99,7 +233,7 @@ Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia **Syntax** -``` sql +```sql randNormal(mean, variance) ``` @@ -116,13 +250,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randNormal(10, 2) FROM numbers(5) ``` Result: -``` result +```result ┌──randNormal(10, 2)─┐ │ 13.389228911709653 │ │ 8.622949707401295 │ @@ -138,7 +272,7 @@ Returns a random Float64 drawn from a [log-normal distribution](https://en.wikip **Syntax** -``` sql +```sql randLogNormal(mean, variance) ``` @@ -155,13 +289,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randLogNormal(100, 5) FROM numbers(5) ``` Result: -``` result +```result ┌─randLogNormal(100, 5)─┐ │ 1.295699673937363e48 │ │ 9.719869109186684e39 │ @@ -177,7 +311,7 @@ Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedi **Syntax** -``` sql +```sql randBinomial(experiments, probability) ``` @@ -194,13 +328,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randBinomial(100, .75) FROM numbers(5) ``` Result: -``` result +```result ┌─randBinomial(100, 0.75)─┐ │ 74 │ │ 78 │ @@ -216,7 +350,7 @@ Returns a random UInt64 drawn from a [negative binomial distribution](https://en **Syntax** -``` sql +```sql randNegativeBinomial(experiments, probability) ``` @@ -233,13 +367,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randNegativeBinomial(100, .75) FROM numbers(5) ``` Result: -``` result +```result ┌─randNegativeBinomial(100, 0.75)─┐ │ 33 │ │ 32 │ @@ -255,7 +389,7 @@ Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia **Syntax** -``` sql +```sql randPoisson(n) ``` @@ -271,13 +405,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randPoisson(10) FROM numbers(5) ``` Result: -``` result +```result ┌─randPoisson(10)─┐ │ 8 │ │ 8 │ @@ -293,7 +427,7 @@ Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikiped **Syntax** -``` sql +```sql randBernoulli(probability) ``` @@ -309,13 +443,13 @@ Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -``` sql +```sql SELECT randBernoulli(.75) FROM numbers(5) ``` Result: -``` result +```result ┌─randBernoulli(0.75)─┐ │ 1 │ │ 1 │ @@ -331,7 +465,7 @@ Returns a random Float64 drawn from a [exponential distribution](https://en.wiki **Syntax** -``` sql +```sql randExponential(lambda) ``` @@ -347,13 +481,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randExponential(1/10) FROM numbers(5) ``` Result: -``` result +```result ┌─randExponential(divide(1, 10))─┐ │ 44.71628934340778 │ │ 4.211013337903262 │ @@ -369,7 +503,7 @@ Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikip **Syntax** -``` sql +```sql randChiSquared(degree_of_freedom) ``` @@ -385,13 +519,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randChiSquared(10) FROM numbers(5) ``` Result: -``` result +```result ┌─randChiSquared(10)─┐ │ 10.015463656521543 │ │ 9.621799919882768 │ @@ -407,7 +541,7 @@ Returns a random Float64 drawn from a [Student's t-distribution](https://en.wiki **Syntax** -``` sql +```sql randStudentT(degree_of_freedom) ``` @@ -423,13 +557,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randStudentT(10) FROM numbers(5) ``` Result: -``` result +```result ┌─────randStudentT(10)─┐ │ 1.2217309938538725 │ │ 1.7941971681200541 │ @@ -445,7 +579,7 @@ Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/ **Syntax** -``` sql +```sql randFisherF(d1, d2) ``` @@ -462,13 +596,13 @@ Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -``` sql +```sql SELECT randFisherF(10, 3) FROM numbers(5) ``` Result: -``` result +```result ┌──randFisherF(10, 3)─┐ │ 7.286287504216609 │ │ 0.26590779413050386 │ @@ -484,7 +618,7 @@ Generates a string of the specified length filled with random bytes (including z **Syntax** -``` sql +```sql randomString(length) ``` @@ -502,13 +636,13 @@ Type: [String](../../sql-reference/data-types/string.md). Query: -``` sql +```sql SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; ``` Result: -``` text +```text Row 1: ────── str: 3 G : pT ?w тi k aV f6 @@ -526,7 +660,7 @@ Generates a binary string of the specified length filled with random bytes (incl **Syntax** -``` sql +```sql randomFixedString(length); ``` @@ -563,7 +697,7 @@ If you pass `length < 0`, the behavior of the function is undefined. **Syntax** -``` sql +```sql randomPrintableASCII(length) ``` @@ -579,11 +713,11 @@ Type: [String](../../sql-reference/data-types/string.md) **Example** -``` sql +```sql SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3 ``` -``` text +```text ┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐ │ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │ │ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │ @@ -597,7 +731,7 @@ Generates a random string of a specified length. Result string contains valid UT **Syntax** -``` sql +```sql randomStringUTF8(length); ``` @@ -635,11 +769,12 @@ Flips the bits of String or FixedString `s`, each with probability `prob`. **Syntax** -``` sql +```sql fuzzBits(s, prob) ``` **Arguments** + - `s` - `String` or `FixedString`, - `prob` - constant `Float32/64` between 0.0 and 1.0. @@ -649,14 +784,14 @@ Fuzzed string with same type as `s`. **Example** -``` sql +```sql SELECT fuzzBits(materialize('abacaba'), 0.1) FROM numbers(3) ``` Result: -``` result +```result ┌─fuzzBits(materialize('abacaba'), 0.1)─┐ │ abaaaja │ │ a*cjab+ │ From e98c30c161303d91c483e7928326f0d8efc1f9df Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Sun, 3 Mar 2024 14:38:59 -0700 Subject: [PATCH 097/985] Reorganizes rand docs page. --- .../functions/random-functions.md | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 2ce9c75eae4..b745d2833d3 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -36,19 +36,21 @@ None. Returns a number of type UInt32. +### Implementation details + +Uses a linear congruential generator. + ### Example ```sql -SELECT rand() +SELECT rand(); ``` ```response 1569354847 ``` -### Implementation details - -Uses a linear congruential generator. +**Note:** The actual output will be a random number, not the specific number shown in the example. ## rand32 @@ -94,16 +96,14 @@ rand64() None. -### Implementation details - -The `rand64` function uses a linear congruential generator, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. - -For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. - ### Output Returns a number of type UInt64, evenly distributed across the range of all possible UInt64 values. +### Implementation details + +The `rand64` function uses a linear congruential generator, which means that while it appears random, it's not truly random and can be predictable if the initial state is known. For scenarios where true randomness is crucial, consider using alternative methods like system-level calls or integrating with external libraries. + ### Example ```sql @@ -160,16 +160,14 @@ randConstant([x]); - **[x] (Optional):** An optional expression that influences the generated random value. Even if provided, the resulting value will still be constant within the same query execution. Different queries using the same expression will likely generate different constant values. -### Implementation details - -The actual output will be different for each query execution, even with the same optional expression. - -The optional parameter may not significantly change the generated value compared to using `randConstant` alone. - ### Output Returns a column of type UInt32 containing the same random value in each row. +### Implementation details + +The actual output will be different for each query execution, even with the same optional expression. The optional parameter may not significantly change the generated value compared to using `randConstant` alone. + ### Examples ```sql From 671b0f678afcdcb354a85aa141920bff09e2bcb2 Mon Sep 17 00:00:00 2001 From: M1eyu2018 <857037797@qq.com> Date: Mon, 4 Mar 2024 10:12:27 +0800 Subject: [PATCH 098/985] Add positional read in libhdfs3 Signed-off-by: M1eyu2018 <857037797@qq.com> --- contrib/libhdfs3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index b9598e60167..0d04201c453 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103 +Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf From c435d5894f48d37478454b1934d000fb967e2973 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 4 Mar 2024 14:23:59 +0800 Subject: [PATCH 099/985] remove wrong assertion n quantileGK --- .../AggregateFunctionGroupArray.cpp | 13 ++++++++----- .../AggregateFunctionQuantileGK.cpp | 12 ++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index d72ddb42d9e..6af8b1018dd 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -182,11 +182,14 @@ public: if constexpr (Trait::sampler == Sampler::NONE) { - if (limit_num_elems && cur_elems.value.size() >= max_elems) + if constexpr (limit_num_elems) { - if constexpr (Trait::last) - cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value; - return; + if (cur_elems.value.size() >= max_elems) + { + if constexpr (Trait::last) + cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value; + return; + } } cur_elems.value.push_back(row_value, arena); @@ -236,7 +239,7 @@ public: void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const { - if (!limit_num_elems) + if constexpr (!limit_num_elems) { if (rhs_elems.value.size()) cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena); diff --git a/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp index 2e8ccb2e5e4..26737e43eef 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp @@ -144,7 +144,7 @@ public: count = other.count; compressed = other.compressed; - sampled.resize(other.sampled.size()); + sampled.resize_exact(other.sampled.size()); memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size()); return; } @@ -180,7 +180,7 @@ public: compress(); backup_sampled.clear(); - backup_sampled.reserve(sampled.size() + other.sampled.size()); + backup_sampled.reserve_exact(sampled.size() + other.sampled.size()); double merged_relative_error = std::max(relative_error, other.relative_error); size_t merged_count = count + other.count; Int64 additional_self_delta = static_cast(std::floor(2 * other.relative_error * other.count)); @@ -268,11 +268,7 @@ public: size_t sampled_len = 0; readBinaryLittleEndian(sampled_len, buf); - if (sampled_len > compress_threshold) - throw Exception( - ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold); - - sampled.resize(sampled_len); + sampled.resize_exact(sampled_len); for (size_t i = 0; i < sampled_len; ++i) { @@ -317,7 +313,7 @@ private: ::sort(head_sampled.begin(), head_sampled.end()); backup_sampled.clear(); - backup_sampled.reserve(sampled.size() + head_sampled.size()); + backup_sampled.reserve_exact(sampled.size() + head_sampled.size()); size_t sample_idx = 0; size_t ops_idx = 0; From 6fbfd42a0522fe4161d367e3d923f2480c1df21a Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 4 Mar 2024 16:13:44 +0800 Subject: [PATCH 100/985] Update 02241_filesystem_cache_on_write_operations.reference --- .../02241_filesystem_cache_on_write_operations.reference | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference index 53566a18edc..186dcc1eeb2 100644 --- a/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference +++ b/tests/queries/0_stateless/02241_filesystem_cache_on_write_operations.reference @@ -205,13 +205,7 @@ INSERT INTO test_02241 SELECT number, toString(number) FROM numbers(300, 10000) SELECT count(), sum(size) FROM system.filesystem_cache 24 84045 SYSTEM START MERGES test_02241 -SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' -81715476 -SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' OPTIMIZE TABLE test_02241 FINAL -SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes' -81881872 -SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes' SELECT count(), sum(size) FROM system.filesystem_cache 32 167243 ALTER TABLE test_02241 UPDATE value = 'kek' WHERE key = 100 From 1768b4477f4ff5db238cd4cc553587b136ed015d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 3 Mar 2024 11:50:22 +0100 Subject: [PATCH 101/985] Revert "Merge pull request #60690 from ClickHouse/remove-bad-test-8" This reverts commit c77eb8b1427f98daf63f7087bbdc0530b07db825, reversing changes made to bae4783fe9bd25decc41383a1234b0e936284c21. --- ..._external_tables_memory_tracking.reference | 16 ++++++ ...52_http_external_tables_memory_tracking.sh | 51 +++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference create mode 100755 tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference new file mode 100644 index 00000000000..1fc09c8d154 --- /dev/null +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.reference @@ -0,0 +1,16 @@ +Checking input_format_parallel_parsing=false& +1 +Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1 +1 +Checking input_format_parallel_parsing=false&send_progress_in_http_headers=true +1 +Checking input_format_parallel_parsing=false&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true +1 +Checking input_format_parallel_parsing=true& +1 +Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1 +1 +Checking input_format_parallel_parsing=true&send_progress_in_http_headers=true +1 +Checking input_format_parallel_parsing=true&cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true +1 diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh new file mode 100755 index 00000000000..5f9eb460e44 --- /dev/null +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-cpu-aarch64, no-parallel +# TSan does not supports tracing. +# trace_log doesn't work on aarch64 + +# Regression for proper release of Context, +# via tracking memory of external tables. + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +tmp_file=$(mktemp "$CURDIR/clickhouse.XXXXXX.csv") +trap 'rm $tmp_file' EXIT + +$CLICKHOUSE_CLIENT -q "SELECT toString(number) FROM numbers(1e6) FORMAT TSV" > "$tmp_file" + +function run_and_check() +{ + local query_id + query_id="$(${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SELECT generateUUIDv4()')" + + echo "Checking $*" + + # Run query with external table (implicit StorageMemory user) + $CLICKHOUSE_CURL -sS -F "s=@$tmp_file;" "$CLICKHOUSE_URL&s_structure=key+Int&query=SELECT+count()+FROM+s&memory_profiler_sample_probability=1&max_untracked_memory=0&query_id=$query_id&$*" -o /dev/null + + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- <<<'SYSTEM FLUSH LOGS' + + # Check that temporary table had been destroyed. + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" + WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym + SELECT count()>0 FROM system.trace_log + WHERE + sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' AND + query_id = '$query_id' + " +} + +for input_format_parallel_parsing in false true; do + query_args_variants=( + "" + "cancel_http_readonly_queries_on_client_close=1&readonly=1" + "send_progress_in_http_headers=true" + # nested progress callback + "cancel_http_readonly_queries_on_client_close=1&readonly=1&send_progress_in_http_headers=true" + ) + for query_args in "${query_args_variants[@]}"; do + run_and_check "input_format_parallel_parsing=$input_format_parallel_parsing&$query_args" + done +done From 048a042dc4963631a23358d3e454dcd8a9eaafa2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 3 Mar 2024 11:50:46 +0100 Subject: [PATCH 102/985] Make 02152_http_external_tables_memory_tracking less flaky Signed-off-by: Azat Khuzhin --- .../02152_http_external_tables_memory_tracking.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh index 5f9eb460e44..5494f7d59cb 100755 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-cpu-aarch64, no-parallel +# Tags: no-tsan, no-cpu-aarch64, no-parallel, no-debug # TSan does not supports tracing. # trace_log doesn't work on aarch64 @@ -30,10 +30,16 @@ function run_and_check() # Check that temporary table had been destroyed. ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&allow_introspection_functions=1" --data-binary @- <<<" WITH arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS sym - SELECT count()>0 FROM system.trace_log + SELECT 1 FROM system.trace_log + PREWHERE + query_id = '$query_id' AND + trace_type = 'MemorySample' AND + /* only deallocations */ + size < 0 AND + event_date >= yesterday() WHERE - sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' AND - query_id = '$query_id' + sym LIKE '%DB::StorageMemory::drop%\n%TemporaryTableHolder::~TemporaryTableHolder%' + LIMIT 1 " } From a7db6688edb50f894457c414b207c25548bb18d3 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 4 Mar 2024 18:24:24 +0800 Subject: [PATCH 103/985] Update ObjectStorageFactory.cpp --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 47c02f87b23..a0578ac4454 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -246,12 +246,11 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - return std::make_unique( + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); - }); } #endif From 81185815a48b36d344bda623dd175c30e9b87ba3 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Mon, 4 Mar 2024 14:09:31 +0300 Subject: [PATCH 104/985] Update settings_changes_history --- src/Core/SettingsChangesHistory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4805df46d9b..b8793f437d8 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,9 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"24.3", { + {"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."}, + }}, {"24.2", { {"output_format_values_escape_quote_with_quote", false, false, "If true escape ' with '', otherwise quoted with \\'"}, {"input_format_try_infer_exponent_floats", true, false, "Don't infer floats in exponential notation by default"}, From fbdc5e305365e9d93b86ed47144ffb13c1ce70c1 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Mon, 4 Mar 2024 17:16:51 +0300 Subject: [PATCH 105/985] Ignore flaky fail of system unfreeze --- .../0_stateless/03000_traverse_shadow_system_data_paths.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh index a22cb200f9a..2905d7801ca 100755 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh @@ -26,7 +26,7 @@ ${CLICKHOUSE_CLIENT} --query " FROM system.remote_data_paths WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' SETTINGS traverse_shadow_remote_data_paths=1;" -${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" >/dev/null +${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null || true ${CLICKHOUSE_CLIENT} --query " SELECT count() == 0 FROM system.remote_data_paths From aa43885ac81924a73e9a151a550e7c1af43d23e2 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 5 Mar 2024 10:57:25 +0800 Subject: [PATCH 106/985] [improve] add check the remaining disk size before copying --- src/Storages/MergeTree/MergeTreeData.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 849ceb1b66d..d8680958c21 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7160,14 +7160,18 @@ std::pair MergeTreeData::cloneAn { try { + auto reservation_space = src_part_storage->reserve(src_part->getBytesOnDisk()); + if (!reservation_space) { + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk."); + } dst_part_storage = src_part_storage->clonePart(this->getRelativeDataPath(), tmp_dst_part_name, disk, read_settings, write_settings, {}, {}); copy_successful = true; break; } - catch (...) + catch (Exception & e) { - LOG_TRACE(&Poco::Logger::get("MergeTreeData"), "Clone part on disk {} fail", disk->getName()); + LOG_TRACE(&Poco::Logger::get("MergeTreeData"), "Clone part on disk {} fail: {}", disk->getName(), e.what()); } } if (!copy_successful) @@ -7291,6 +7295,9 @@ std::pair MergeTreeData::cloneAn { try { + auto reservation_space = src_part_storage->reserve(src_part->getBytesOnDisk()); + if (!reservation_space) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk."); dst_part_storage = src_part_storage->clonePart(this->getRelativeDataPath(), tmp_dst_part_name, disk, read_settings, write_settings, {}, {}); copy_successful = true; From b0050566e22d10ca621a33c1b4fedb987ad2620c Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:14:56 +0800 Subject: [PATCH 107/985] Fix style check --- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index fe8d63b053d..298000ac015 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -21,7 +21,7 @@ namespace { return settings.remote_fs_cache && settings.enable_filesystem_cache; } - + bool withPageCache(const ReadSettings & settings, bool with_file_cache) { return settings.page_cache && !with_file_cache && settings.use_page_cache_for_disks_without_file_cache; From 2ee846b393d79f3f0d9710ddf910552ba1e040cd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 5 Mar 2024 14:07:56 +0800 Subject: [PATCH 108/985] Fix build --- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 298000ac015..f72e6634465 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -17,7 +17,7 @@ using namespace DB; namespace { - bool withCache(const ReadSettings & settings) + bool withFileCache(const ReadSettings & settings) { return settings.remote_fs_cache && settings.enable_filesystem_cache; } From 758a75c1b46fa27a88e3dcf6e70a18dcf41d62ef Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Tue, 5 Mar 2024 09:53:30 +0300 Subject: [PATCH 109/985] Fix flaky test. Fix clang-tidy warning --- src/Disks/IDisk.h | 7 +++---- src/Disks/ObjectStorages/DiskObjectStorage.h | 2 +- src/Storages/System/StorageSystemRemoteDataPaths.cpp | 4 ++-- .../0_stateless/03000_traverse_shadow_system_data_paths.sh | 7 +------ 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 62b02938d1a..fcc92db7b96 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -321,11 +321,10 @@ public: }; virtual void getRemotePathsRecursive( - const String &, - std::vector &, - const std::function & /* skip_predicate */ = {}) + const String &, std::vector &, const std::function & /* skip_predicate */) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "Method `getRemotePathsRecursive() not implemented for disk: {}`", getDataSourceDescription().toString()); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index d7af656bea3..9f11c0ed02e 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -51,7 +51,7 @@ public: void getRemotePathsRecursive( const String & local_path, std::vector & paths_map, - const std::function & skip_predicate = {}) override; + const std::function & skip_predicate) override; const std::string & getCacheName() const override { return object_storage->getCacheName(); } diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 708c1369965..a6263f18492 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -58,8 +58,8 @@ Pipe StorageSystemRemoteDataPaths::read( if (disk->isRemote()) { std::vector remote_paths_by_local_path; - disk->getRemotePathsRecursive("store", remote_paths_by_local_path); - disk->getRemotePathsRecursive("data", remote_paths_by_local_path); + disk->getRemotePathsRecursive("store", remote_paths_by_local_path, /* skip_predicate = */ {}); + disk->getRemotePathsRecursive("data", remote_paths_by_local_path, /* skip_predicate = */ {}); if (context->getSettingsRef().traverse_shadow_remote_data_paths) disk->getRemotePathsRecursive( "shadow", diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh index 2905d7801ca..a1d4b9bba46 100755 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh @@ -26,9 +26,4 @@ ${CLICKHOUSE_CLIENT} --query " FROM system.remote_data_paths WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' SETTINGS traverse_shadow_remote_data_paths=1;" -${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null || true -${CLICKHOUSE_CLIENT} --query " - SELECT count() == 0 - FROM system.remote_data_paths - WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' - SETTINGS traverse_shadow_remote_data_paths=1;" +${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null From df80c8c9f6ee0939cc6e6e05f3e951511a20f476 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Tue, 5 Mar 2024 10:43:48 +0300 Subject: [PATCH 110/985] Update test reference --- .../03000_traverse_shadow_system_data_paths.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference index e8183f05f5d..6ed281c757a 100644 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.reference @@ -1,3 +1,2 @@ 1 1 -1 From e789d15948eaec3eaa9a8604e24d2f6ed7b60db5 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 5 Mar 2024 16:06:25 +0800 Subject: [PATCH 111/985] optimize insertmanyfrom of nullable(number) or nullable(string) --- src/Columns/ColumnDecimal.h | 7 +++++++ src/Columns/ColumnNullable.cpp | 8 ++++++++ src/Columns/ColumnNullable.h | 1 + src/Columns/ColumnString.cpp | 21 +++++++++++++++++++++ src/Columns/ColumnString.h | 2 ++ 5 files changed, 39 insertions(+) diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 7ca01a8342c..e0ea26744dc 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -56,6 +56,13 @@ public: void shrinkToFit() override { data.shrink_to_fit(); } void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast(src).getData()[n]); } + + void insertManyFrom(const IColumn & src, size_t position, size_t length) override + { + ValueType v = assert_cast(src).getData()[position]; + data.resize_fill(data.size() + length, v); + } + void insertData(const char * src, size_t /*length*/) override; void insertDefault() override { data.push_back(T()); } void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); } diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 1d11827ac97..fa5fdfb8c21 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -231,6 +231,14 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n) getNullMapData().push_back(src_concrete.getNullMapData()[n]); } + +void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnNullable & src_concrete = assert_cast(src); + getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length); + getNullMapColumn().insertManyFrom(src_concrete.getNullMapColumn(), position, length); +} + void ColumnNullable::insertFromNotNullable(const IColumn & src, size_t n) { getNestedColumn().insertFrom(src, n); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index b4aef8e08fa..ef4bf4fa41b 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -69,6 +69,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertFromNotNullable(const IColumn & src, size_t n); void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index b9128372cea..f3c7ac1bf0c 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -38,6 +38,27 @@ ColumnString::ColumnString(const ColumnString & src) last_offset, chars.size()); } +void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnString & src_concrete = assert_cast(src); + const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]]; + const size_t src_buf_size + = src_concrete.offsets[position] - src_concrete.offsets[position - 1]; /// -1th index is Ok, see PaddedPODArray. + + const size_t old_size = chars.size(); + const size_t new_size = old_size + src_buf_size * length; + chars.resize(new_size); + + const size_t old_rows = offsets.size(); + offsets.resize(old_rows + length); + + for (size_t current_offset = old_size; current_offset < new_size; current_offset += src_buf_size) + memcpySmallAllowReadWriteOverflow15(&chars[current_offset], src_buf, src_buf_size); + + for (size_t i = 0, current_offset = old_size + src_buf_size; i < length; ++i, current_offset += src_buf_size) + offsets[old_rows + i] = current_offset; +} + MutableColumnPtr ColumnString::cloneResized(size_t to_size) const { diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 04aa1849187..2d1d69ced73 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -160,6 +160,8 @@ public: } } + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void insertData(const char * pos, size_t length) override { const size_t old_size = chars.size(); From 47ad21dd257ff1a5751d191dfd311a7950a93111 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:17:04 +0100 Subject: [PATCH 112/985] Remove extra empty line --- .../03002_map_array_functions_with_low_cardinality.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql index 8240a8f93f5..8820a433da8 100644 --- a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql @@ -1,2 +1 @@ SELECT mapContainsKeyLike(map('aa', toLowCardinality(1), 'bb', toLowCardinality(2)), toLowCardinality('a%')); - From 580fd4ba080df6e29c59b785b1fca0eea76e649c Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Tue, 5 Mar 2024 10:43:48 +0300 Subject: [PATCH 113/985] Update test reference --- .../0_stateless/03000_traverse_shadow_system_data_paths.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh index a1d4b9bba46..98575540923 100755 --- a/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh +++ b/tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh @@ -26,4 +26,4 @@ ${CLICKHOUSE_CLIENT} --query " FROM system.remote_data_paths WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%' SETTINGS traverse_shadow_remote_data_paths=1;" -${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null +${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null || true From a109952960acac12790cffde030062ec60208994 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 5 Mar 2024 22:08:36 +0800 Subject: [PATCH 114/985] dev columnstring --- src/Columns/ColumnArray.cpp | 83 +++++++++++++++++++++++++++++++ src/Columns/ColumnArray.h | 9 ++++ src/Columns/ColumnConst.h | 2 + src/Columns/ColumnFixedString.cpp | 14 ++++++ src/Columns/ColumnFixedString.h | 2 + 5 files changed, 110 insertions(+) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 7b268b80116..b620da81ae8 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -347,6 +347,89 @@ void ColumnArray::insertFrom(const IColumn & src_, size_t n) getOffsets().push_back(getOffsets().back() + size); } +template +void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, size_t length) +{ + using ColVecType = ColumnVectorOrDecimal; + size_t src_size = src.sizeAt(position); + size_t src_offset = src.offsetAt(position); + + const typename ColVecType::Container & src_data = typeid_cast(src.getData()).getData(); + typename ColVecType::Container & data_ref = typeid_cast(getData()).getData(); + size_t old_size = data_ref.size(); + size_t new_size = old_size + src_size * length; + data_ref.resize(new_size); + for (size_t i = 0, offset = old_size; i < length; ++i, offset += src_size) + memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); +} + +void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, size_t length) +{ + size_t src_size = src.sizeAt(position); + size_t src_offset = src.offsetAt(position); + + const auto & src_string = typeid_cast(src.getData()); + const auto & src_chars = src_string.getChars(); + const auto & src_string_offsets = src_string.getOffsets(); + auto & dst_string = typeid_cast(getData()); + auto & dst_chars = dst_string.getChars(); + auto & dst_string_offsets = dst_string.getOffsets(); + + /// Each row may have multiple strings, copy them to dst_chars and update dst_offsets + size_t old_size = dst_string_offsets.size(); + size_t new_size = old_size + src_size * length; + dst_string_offsets.resize(new_size); + size_t dst_string_offset = dst_chars.size(); + for (size_t i = 0; i < length; ++i) + { + for (size_t j = 0; j < src_size; ++j) + { + size_t nested_offset = src_string_offsets[src_offset + j - 1]; + size_t nested_length = src_string_offsets[src_offset + j] - nested_offset; + + dst_string_offset += nested_length; + dst_string_offsets[old_size + i * src_size + j] = dst_string_offset; + } + } + + size_t chars_to_copy = src_string_offsets[src_offset + src_size - 1] - src_string_offsets[src_offset - 1]; + dst_chars.resize(dst_chars.size() + chars_to_copy * length); + for (size_t dst_offset = old_size; dst_offset < new_size; dst_offset += src_size) + memcpy(&dst_chars[dst_string_offsets[dst_offset - 1]], &src_chars[src_string_offsets[src_offset - 1]], chars_to_copy); +} + +void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, size_t length) +{ + +} +void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t position, size_t length) +{ + +} +void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length) +{ + size_t src_size = src.sizeAt(position); + size_t src_offset = src.offsetAt(position); + const auto & src_data = src.getData(); + size_t new_size = data->size() + src_size * length; + data->reserve(new_size); + for (size_t i = 0; i < length; ++i) + data->insertRangeFrom(src_data, src_offset, src_size); +} + +void ColumnArray::insertManyFrom(const IColumn & src_, size_t position, size_t length) +{ + /// First fill offsets + const ColumnArray & src = assert_cast(src_); + size_t src_size = src.sizeAt(position); + auto & offsets_ref = getOffsets(); + size_t old_rows = offsets_ref.size(); + size_t new_rows = old_rows + length; + size_t old_size = offsets_ref.back(); + offsets_ref.resize(new_rows); + for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) + offsets_ref[old_rows + i] = offset; +} void ColumnArray::insertDefault() { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 230d8830265..73d632a38b9 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -88,6 +88,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override; void popBack(size_t n) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; @@ -213,6 +214,14 @@ private: ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; + /// Specializations for insertManyFrom + template + void insertManyFromNumber(const ColumnArray & src, size_t position, size_t length); + void insertManyFromString(const ColumnArray & src, size_t position, size_t length); + void insertManyFromTuple(const ColumnArray & src, size_t position, size_t length); + void insertManyFromNullable(const ColumnArray & src, size_t position, size_t length); + void insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length); + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const; }; diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 990b7189fa3..4a3d40ca0d2 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -150,6 +150,8 @@ public: ++s; } + void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; } + void insertDefault() override { ++s; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index e460c84d696..b55f68d4687 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -85,6 +85,20 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index) memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n); } +void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnFixedString & src_concrete = assert_cast(src); + if (n != src_concrete.getN()) + throw Exception(ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH, "Size of FixedString doesn't match"); + + const size_t old_size = chars.size(); + const size_t new_size = old_size + n * length; + chars.resize(new_size); + + for (size_t offset = old_size; offset < new_size; offset += n) + memcpySmallAllowReadWriteOverflow15(&chars[offset], &src_concrete.chars[n * position], n); +} + void ColumnFixedString::insertData(const char * pos, size_t length) { if (length > n) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index f40e1356b27..56d42e8b34e 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -100,6 +100,8 @@ public: void insertFrom(const IColumn & src_, size_t index) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; + void insertData(const char * pos, size_t length) override; void insertDefault() override From bfb703b579fa192dc58e51ea842067e7e379e949 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 5 Mar 2024 15:38:42 +0100 Subject: [PATCH 115/985] Add mortonEncode and mortonDecode to documentation --- .../functions/encoding-functions.md | 203 ++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 618dd3f4b4f..7fd77ce3a6a 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -433,3 +433,206 @@ Result: │ [0,1,2,3,4,5,6,7] │ └───────────────────┘ ``` + +## mortonEncode + +Calculates the Morton encoding (ZCurve) for a list of unsigned integers. + +The function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts up to 8 unsigned integers as arguments and produces a UInt64 code. + +**Syntax** + +```sql +mortonEncode(args) +``` + +**Parameters** + +- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT mortonEncode(1, 2, 3); +``` + +```response +53 +``` + +### Expanded mode + +Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. + +Each number in the mask configures the amount of range expansion: +1 - no expansion +2 - 2x expansion +3 - 3x expansion +... +Up to 8x expansion. + +**Syntax** + +```sql +mortonEncode(range_mask, args) +``` + +**Parameters** +- `range_mask`: 1-8. +- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type. + +Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. + +**Returned value** + +- A UInt64 code + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + + +**Example** + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). + +Query: + +```sql +SELECT mortonEncode((1,2), 1024, 16); +``` + +```response +1572864 +``` + +Note: tuple size must be equal to the number of the other arguments. + +**Example** + +Morton encoding for one argument is always the argument itself: + +Query: + +```sql +SELECT mortonEncode(1); +``` + +```response +1 +``` + +**Example** + +It is also possible to expand one argument too: + +Query: + +```sql +SELECT mortonEncode(tuple(2), 128); +``` + +```response +32768 +``` + +**implementation details** + +Please note that you can fit only so much bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. + +## mortonDecode + +Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple. + +As with the `mortonEncode` function, this function has two modes of operation: +- Simple +- Expanded + +### Simple mode + +Accepts a resulting tuple size as the first argument and the code as the second argument. + +**Syntax** + +```sql +mortonDecode(tuple_size, code) +``` + +**Parameters** +- `tuple_size`: integer value no more than 8. +- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code. + +**Returned value** + +- [tuple](../../sql-reference/data-types/tuple.md) of the specified size. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md) + +**Example** + +Query: + +```sql +SELECT mortonDecode(3, 53); +``` + +```response +["1","2","3"] +``` + +### Expanded mode + +Accepts a range mask (tuple) as a first argument and the code as the second argument. +Each number in the mask configures the amount of range shrink +1 - no shrink +2 - 2x shrink +3 - 3x shrink +... +Up to 8x shrink. + +Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) +For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). +As with the encode function, this is limited to 8 numbers at most. + +**Example** + +Query: + +```sql +SELECT mortonDecode(1, 1); +``` + +```response +["1"] +``` + +**Example** + +It is also possible to shrink one argument: + +Query: + +```sql +SELECT mortonDecode(tuple(2), 32768); +``` + +```response +["128"] +``` + + + + From 7930a26df136c8a2e10f839ea4738a338dbb6c9e Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 5 Mar 2024 15:47:45 +0100 Subject: [PATCH 116/985] Fix formatting of compression/expansion levels --- .../functions/encoding-functions.md | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 7fd77ce3a6a..0cb459b8e07 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -478,12 +478,12 @@ SELECT mortonEncode(1, 2, 3); Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments. -Each number in the mask configures the amount of range expansion: -1 - no expansion -2 - 2x expansion -3 - 3x expansion -... -Up to 8x expansion. +Each number in the mask configures the amount of range expansion:
+1 - no expansion
+2 - 2x expansion
+3 - 3x expansion
+...
+Up to 8x expansion.
**Syntax** @@ -596,12 +596,12 @@ SELECT mortonDecode(3, 53); ### Expanded mode Accepts a range mask (tuple) as a first argument and the code as the second argument. -Each number in the mask configures the amount of range shrink -1 - no shrink -2 - 2x shrink -3 - 3x shrink -... -Up to 8x shrink. +Each number in the mask configures the amount of range shrink:
+1 - no shrink
+2 - 2x shrink
+3 - 3x shrink
+...
+Up to 8x shrink.
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality) For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF). From 45509607ad139c099c5a4d5fea07ac34149dcf2d Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 5 Mar 2024 15:50:30 +0100 Subject: [PATCH 117/985] Fix spelling mistake --- docs/en/sql-reference/functions/encoding-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 0cb459b8e07..28431c84add 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -551,7 +551,7 @@ SELECT mortonEncode(tuple(2), 128); **implementation details** -Please note that you can fit only so much bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. +Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. ## mortonDecode From aa6b70e5f2187be71b6bce835ecff0aa0c0bfca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 5 Mar 2024 16:55:08 +0000 Subject: [PATCH 118/985] Add documentation to `simpleJSON` functions --- .../sql-reference/functions/json-functions.md | 392 +++++++++++++++--- 1 file changed, 342 insertions(+), 50 deletions(-) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 2c837ff4a42..246cb8972fb 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -5,80 +5,372 @@ sidebar_label: JSON --- There are two sets of functions to parse JSON. - - `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. + - `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. - `JSONExtract*` is made to parse normal JSON. -# visitParam functions +# simpleJSON/visitParam functions ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. The following assumptions are made: 1. The field name (function argument) must be a constant. -2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` +2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON does not have space characters outside of string literals. -## visitParamHas(params, name) +## simpleJSONHas -Checks whether there is a field with the `name` name. +Checks whether there is a field named `field_name`. The result is `UInt8`. -Alias: `simpleJSONHas`. +**Syntax** -## visitParamExtractUInt(params, name) - -Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0. - -Alias: `simpleJSONExtractUInt`. - -## visitParamExtractInt(params, name) - -The same as for Int64. - -Alias: `simpleJSONExtractInt`. - -## visitParamExtractFloat(params, name) - -The same as for Float64. - -Alias: `simpleJSONExtractFloat`. - -## visitParamExtractBool(params, name) - -Parses a true/false value. The result is UInt8. - -Alias: `simpleJSONExtractBool`. - -## visitParamExtractRaw(params, name) - -Returns the value of a field, including separators. - -Alias: `simpleJSONExtractRaw`. - -Examples: - -``` sql -visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'; -visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'; +```sql +simpleJSONHas(json, field_name) ``` -## visitParamExtractString(params, name) +**Parameters** -Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string. +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) -Alias: `simpleJSONExtractString`. +**Returned value** -Examples: +It returns `1` if the field exists, `0` otherwise. -``` sql -visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'; -visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'; -visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''; -visitParamExtractString('{"abc":"hello}', 'abc') = ''; +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons; ``` +```response +1 +0 +``` +## simpleJSONExtractUInt + +Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractUInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +4 +0 +3 +5 +``` + +## simpleJSONExtractInt + +Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4 +0 +-3 +5 +``` + +## simpleJSONExtractFloat + +Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractFloat(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4000 +0 +-3.4 +5 +``` + +## simpleJSONExtractBool + +Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`. + +**Syntax** + +```sql +simpleJSONExtractBool(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains `true` as a string, e.g.: `{"field":"true"}`. + - If the field contains `1` as a numerical value. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +1 +0 +0 +``` + +## simpleJSONExtractRaw + +Returns the value of the field named `field_name` as a `String`, including separators. + +**Syntax** + +```sql +simpleJSONExtractRaw(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an emtpy `String` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json; +``` + +```response + +"-4e3" +-3.4 +5 +{"def":[1,2,3]} +``` + +## simpleJSONExtractString + +Parses `String` in double quotes from the value of the field named `field_name`. + +**Syntax** + +```sql +simpleJSONExtractString(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. + +**Implementation details** + There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8). +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +\n\0 + +☺ + +``` + +## visitParamHas + +This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas). + +## visitParamExtractUInt + +This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint). + +## visitParamExtractInt + +This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint). + +## visitParamExtractFloat + +This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat). + +## visitParamExtractBool + +This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool). + +## visitParamExtractRaw + +This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw). + +## visitParamExtractString + +This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring). + # JSONExtract functions The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. From 981c507d8007a4f7761a83a2ecfa0956a364317d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 5 Mar 2024 17:01:54 +0000 Subject: [PATCH 119/985] Add example to `sin`. --- docs/en/sql-reference/functions/math-functions.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index b27668caf0c..fc659891b5c 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -299,6 +299,18 @@ sin(x) Type: [Float*](../../sql-reference/data-types/float.md). +**Example** + +Query: + +```sql +SELECT sin(1.23); +``` + +```response +0.9424888019316975 +``` + ## cos Returns the cosine of the argument. From 57670a69be7aee37141aad13c3c9509ea2a40162 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 04:15:57 +0100 Subject: [PATCH 120/985] Add mortonEncode, mortonDecode and related to spelling exceptions --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f61448b2f35..6257b2fcd95 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -260,6 +260,7 @@ ExactEdgeLengthRads ExecutablePool ExtType ExternalDistributed +FFFFFFFF FFFD FIPS FOSDEM @@ -546,6 +547,8 @@ MinIO MinMax MindsDB Mongodb +mortonDecode +mortonEncode MsgPack MultiPolygon Multiline @@ -2741,6 +2744,7 @@ xz yaml yandex youtube +ZCurve zLib zLinux zabbix From 53c9d4513c4b93ed79df305bb5c36c0cfb43ef79 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 12:16:17 +0800 Subject: [PATCH 121/985] finish dev column array --- src/Columns/ColumnArray.cpp | 132 +++++++++++++++++++++++++++++++++--- src/Columns/ColumnArray.h | 3 + 2 files changed, 125 insertions(+), 10 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index b620da81ae8..aa0d5aa3e50 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -31,6 +31,7 @@ namespace ErrorCodes extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int LOGICAL_ERROR; extern const int TOO_LARGE_ARRAY_SIZE; + extern const int ILLEGAL_COLUMN; } /** Obtaining array as Field can be slow for large arrays and consume vast amount of memory. @@ -363,6 +364,19 @@ void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); } +void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t position, size_t length) +{ + const ColumnArray * src_array = typeid_cast(&src.getDataColumn()); + if (!src_array) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot insert from const column of type {} to column of type {}", + src.getDataColumn().getName(), + getName()); + + insertManyFromImpl(*src_array, 0, length, true); +} + void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, size_t length) { size_t src_size = src.sizeAt(position); @@ -400,12 +414,53 @@ void ColumnArray::insertManyFromString(const ColumnArray & src, size_t position, void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, size_t length) { + ColumnTuple & tuple = assert_cast(getData()); + const ColumnTuple & src_tuple = assert_cast(src.getData()); + /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. + size_t tuple_size = tuple.tupleSize(); + size_t src_tuple_size = src_tuple.tupleSize(); + if (tuple_size == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple"); + if (tuple_size != src_tuple_size) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); + + Columns temporary_arrays(tuple_size); + Columns src_temporary_arrays(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + { + temporary_arrays[i] = ColumnArray::create(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable()); + src_temporary_arrays[i] = ColumnArray::create(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + assert_cast(*temporary_arrays[i]) + .insertManyFromImpl(assert_cast(*src_temporary_arrays[i]), position, length, false); + } + + Columns tuple_columns(tuple_size); + for (size_t i = 0; i < tuple_size; ++i) + tuple_columns[i] = assert_cast(*temporary_arrays[i]).getDataPtr(); + + getDataPtr() = ColumnTuple::create(std::move(tuple_columns)); } + void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t position, size_t length) { + ColumnNullable & nullable = assert_cast(getData()); + const ColumnNullable & src_nullable = assert_cast(src.getData()); + /// Process nested column without updating array offsets + auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto src_array_of_nested = ColumnArray(src_nullable.getNestedColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + array_of_nested.insertManyFromImpl(src_array_of_nested, position, length, false); + + /// Process null map column without updating array offsets + auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto src_array_of_null_map = ColumnArray(src_nullable.getNullMapColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + array_of_null_map.insertManyFromImpl(src_array_of_null_map, position, length, false); + + /// Update array data + getDataPtr() = ColumnNullable::create(array_of_nested.getDataPtr(), array_of_null_map.getDataPtr()); } + void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position, size_t length) { size_t src_size = src.sizeAt(position); @@ -419,16 +474,73 @@ void ColumnArray::insertManyFromGeneric(const ColumnArray & src, size_t position void ColumnArray::insertManyFrom(const IColumn & src_, size_t position, size_t length) { - /// First fill offsets - const ColumnArray & src = assert_cast(src_); - size_t src_size = src.sizeAt(position); - auto & offsets_ref = getOffsets(); - size_t old_rows = offsets_ref.size(); - size_t new_rows = old_rows + length; - size_t old_size = offsets_ref.back(); - offsets_ref.resize(new_rows); - for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) - offsets_ref[old_rows + i] = offset; + const ColumnConst * src_const = typeid_cast(&src_); + if (src_const) + return insertManyFromConst(*src_const, position, length); + + const ColumnArray * src_array = typeid_cast(&src_); + if (!src_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert from column of type {} to column of type {}", src_.getName(), getName()); + + return insertManyFromImpl(*src_array, position, length, true); +} + +void ColumnArray::insertManyFromImpl(const ColumnArray & src, size_t position, size_t length, bool update_offsets) +{ + /// First fill offsets if needed + if (update_offsets) + { + size_t src_size = src.sizeAt(position); + auto & offsets_ref = getOffsets(); + size_t old_rows = offsets_ref.size(); + size_t new_rows = old_rows + length; + size_t old_size = offsets_ref.back(); + offsets_ref.resize(new_rows); + for (size_t i = 0, offset = old_size + src_size; i < length; ++i, offset += src_size) + offsets_ref[old_rows + i] = offset; + } + + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast *>(data.get())) + return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromNullable(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromTuple(src, position, length); + return insertManyFromGeneric(src, position, length); } void ColumnArray::insertDefault() diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 73d632a38b9..765f86ec552 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -215,6 +215,9 @@ private: ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; /// Specializations for insertManyFrom + void insertManyFromConst(const ColumnConst & src, size_t position, size_t length); + void insertManyFromImpl(const ColumnArray & src, size_t position, size_t length, bool update_offsets = true); + template void insertManyFromNumber(const ColumnArray & src, size_t position, size_t length); void insertManyFromString(const ColumnArray & src, size_t position, size_t length); From 3bf3c7cc708d1a564896d649a1a804b868f89d8d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 12:32:23 +0800 Subject: [PATCH 122/985] finish column map and tuple --- src/Columns/ColumnArray.cpp | 2 +- src/Columns/ColumnMap.cpp | 5 +++++ src/Columns/ColumnMap.h | 1 + src/Columns/ColumnTuple.cpp | 12 ++++++++++++ src/Columns/ColumnTuple.h | 1 + 5 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index aa0d5aa3e50..5b0df8e9b6b 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -364,7 +364,7 @@ void ColumnArray::insertManyFromNumber(const ColumnArray & src, size_t position, memcpy(&data_ref[offset], &src_data[src_offset], src_size * sizeof(T)); } -void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t position, size_t length) +void ColumnArray::insertManyFromConst(const ColumnConst & src, size_t /*position*/, size_t length) { const ColumnArray * src_array = typeid_cast(&src.getDataColumn()); if (!src_array) diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 995f3103484..57e8ba685b4 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -158,6 +158,11 @@ void ColumnMap::insertFrom(const IColumn & src, size_t n) nested->insertFrom(assert_cast(src).getNestedColumn(), n); } +void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + assert_cast(*nested).insertManyFrom(assert_cast(src).getNestedColumn(), position, length); +} + void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length) { nested->insertRangeFrom( diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index 17cd86a3788..60aa69e7bf6 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -67,6 +67,7 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; void updateHashFast(SipHash & hash) const override; void insertFrom(const IColumn & src_, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; void expand(const Filter & mask, bool inverted) override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 17cc58d92f5..062bdadf9d2 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -185,6 +185,18 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) columns[i]->insertFrom(*src.columns[i], n); } +void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length) +{ + const ColumnTuple & src_tuple = assert_cast(src); + + const size_t tuple_size = columns.size(); + if (src_tuple.columns.size() != tuple_size) + throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple"); + + for (size_t i = 0; i < tuple_size; ++i) + columns[i]->insertManyFrom(*src_tuple.columns[i], position, length); +} + void ColumnTuple::insertDefault() { for (auto & column : columns) diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 610416b8b11..5b626155754 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -60,6 +60,7 @@ public: void insert(const Field & x) override; bool tryInsert(const Field & x) override; void insertFrom(const IColumn & src_, size_t n) override; + void insertManyFrom(const IColumn & src, size_t position, size_t length) override; void insertDefault() override; void popBack(size_t n) override; StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; From 68a3ca37c40db7f9b928d9f20bde6912ba6bd7da Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 06:42:01 +0100 Subject: [PATCH 123/985] Add examples using columns --- .../functions/encoding-functions.md | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index 28431c84add..c81b3e35317 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -515,6 +515,8 @@ Query: SELECT mortonEncode((1,2), 1024, 16); ``` +Result: + ```response 1572864 ``` @@ -531,6 +533,8 @@ Query: SELECT mortonEncode(1); ``` +Result: + ```response 1 ``` @@ -545,10 +549,49 @@ Query: SELECT mortonEncode(tuple(2), 128); ``` +Result: + ```response 32768 ``` +**Example** + +You can also use column names in the function. + +Query: + +First create the table and insert some data. + +```sql +create table morton_numbers( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into morton_numbers (*) values(1,2,3,4,5,6,7,8); +``` +Use column names instead of constants as function arguments to `mortonEncode` + +Query: + +```sql +SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers; +``` + +Result: + +```response +2155374165 +``` + **implementation details** Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero. @@ -589,6 +632,8 @@ Query: SELECT mortonDecode(3, 53); ``` +Result: + ```response ["1","2","3"] ``` @@ -615,6 +660,8 @@ Query: SELECT mortonDecode(1, 1); ``` +Result: + ```response ["1"] ``` @@ -629,10 +676,48 @@ Query: SELECT mortonDecode(tuple(2), 32768); ``` +Result: + ```response ["128"] ``` +**Example** + +You can also use column names in the function. + +First create the table and insert some data. + +Query: +```sql +create table morton_numbers( + n1 UInt32, + n2 UInt32, + n3 UInt16, + n4 UInt16, + n5 UInt8, + n6 UInt8, + n7 UInt8, + n8 UInt8 +) +Engine=MergeTree() +ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +insert into morton_numbers (*) values(1,2,3,4,5,6,7,8); +``` +Use column names instead of constants as function arguments to `mortonDecode` + +Query: + +```sql +select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers; +``` + +Result: + +```response +1 2 3 4 5 6 7 8 +``` + From 3005bff23100539dbb71f9623dc3aed9c34a87f6 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 14:43:33 +0800 Subject: [PATCH 124/985] fix building --- src/Columns/ColumnArray.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 5b0df8e9b6b..389b3e97820 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -425,7 +425,7 @@ void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, if (tuple_size != src_tuple_size) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); - Columns temporary_arrays(tuple_size); + MutableColumns temporary_arrays(tuple_size); Columns src_temporary_arrays(tuple_size); for (size_t i = 0; i < tuple_size; ++i) { From 3dbb0a12fb433b29107d449099efbc99f5d71f34 Mon Sep 17 00:00:00 2001 From: unashi Date: Wed, 6 Mar 2024 16:15:37 +0800 Subject: [PATCH 125/985] [fix] style --- src/Storages/MergeTree/MergeTreeData.cpp | 7 +++---- tests/integration/helpers/cluster.py | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d8680958c21..c76ffeee874 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7161,11 +7161,10 @@ std::pair MergeTreeData::cloneAn try { auto reservation_space = src_part_storage->reserve(src_part->getBytesOnDisk()); - if (!reservation_space) { + if (!reservation_space) throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on disk."); - } - dst_part_storage - = src_part_storage->clonePart(this->getRelativeDataPath(), tmp_dst_part_name, disk, read_settings, write_settings, {}, {}); + dst_part_storage = src_part_storage->clonePart( + this->getRelativeDataPath(), tmp_dst_part_name, disk, read_settings, write_settings, {}, {}); copy_successful = true; break; } diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 1d96563251b..767ba5b6660 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3216,7 +3216,7 @@ services: - timeout:1 - inet6 - rotate - {networks} + {123} {app_net} {ipv4_address} {ipv6_address} From b4dba828a4dcde93944e05b512818827fd3e5a85 Mon Sep 17 00:00:00 2001 From: unashi Date: Wed, 6 Mar 2024 16:19:07 +0800 Subject: [PATCH 126/985] [fix] --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 767ba5b6660..1d96563251b 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3216,7 +3216,7 @@ services: - timeout:1 - inet6 - rotate - {123} + {networks} {app_net} {ipv4_address} {ipv6_address} From 6d4514c045cc565919f9c8384710eee89354f0f3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 6 Mar 2024 16:55:48 +0800 Subject: [PATCH 127/985] Fix test --- src/Storages/System/StorageSystemDisks.cpp | 10 +++++++++- tests/integration/test_backup_restore_s3/test.py | 12 ++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 30d64156b22..0f8a6640f2c 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -25,6 +25,8 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) {"unreserved_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, {"type", std::make_shared()}, + {"object_storage_type", std::make_shared()}, + {"metadata_type", std::make_shared()}, {"is_encrypted", std::make_shared()}, {"is_read_only", std::make_shared()}, {"is_write_once", std::make_shared()}, @@ -53,6 +55,8 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_unreserved = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); + MutableColumnPtr col_object_storage_type = ColumnString::create(); + MutableColumnPtr col_metadata_type = ColumnString::create(); MutableColumnPtr col_is_encrypted = ColumnUInt8::create(); MutableColumnPtr col_is_read_only = ColumnUInt8::create(); MutableColumnPtr col_is_write_once = ColumnUInt8::create(); @@ -69,7 +73,9 @@ Pipe StorageSystemDisks::read( col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); - col_type->insert(data_source_description.toString()); + col_type->insert(data_source_description.type); + col_object_storage_type->insert(data_source_description.object_storage_type); + col_metadata_type->insert(data_source_description.metadata_type); col_is_encrypted->insert(data_source_description.is_encrypted); col_is_read_only->insert(disk_ptr->isReadOnly()); col_is_write_once->insert(disk_ptr->isWriteOnce()); @@ -91,6 +97,8 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_unreserved)); res_columns.emplace_back(std::move(col_keep)); res_columns.emplace_back(std::move(col_type)); + res_columns.emplace_back(std::move(col_object_storage_type)); + res_columns.emplace_back(std::move(col_metadata_type)); res_columns.emplace_back(std::move(col_is_encrypted)); res_columns.emplace_back(std::move(col_is_read_only)); res_columns.emplace_back(std::move(col_is_write_once)); diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 4d3ee8200a3..95e264107e4 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -124,15 +124,15 @@ def check_backup_and_restore( def check_system_tables(backup_query_id=None): disks = [ tuple(disk.split("\t")) - for disk in node.query("SELECT name, type FROM system.disks").split("\n") + for disk in node.query("SELECT name, type, object_storage_type, metadata_type FROM system.disks").split("\n") if disk ] expected_disks = ( - ("default", "local"), - ("disk_s3", "s3"), - ("disk_s3_cache", "s3"), - ("disk_s3_other_bucket", "s3"), - ("disk_s3_plain", "s3_plain"), + ("default", "local", "", ""), + ("disk_s3", "object_storage", "s3", "local"), + ("disk_s3_cache", "object_storage", "s3", "local"), + ("disk_s3_other_bucket", "object_storage", "s3", "local"), + ("disk_s3_plain", "object_storage", "s3", "plain"), ) assert len(expected_disks) == len(disks) for expected_disk in expected_disks: From be98c95f586762cdf20a6375917e30f296175593 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 6 Mar 2024 09:12:26 +0000 Subject: [PATCH 128/985] Automatic style fix --- tests/integration/test_backup_restore_s3/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 95e264107e4..452a9143067 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -124,7 +124,9 @@ def check_backup_and_restore( def check_system_tables(backup_query_id=None): disks = [ tuple(disk.split("\t")) - for disk in node.query("SELECT name, type, object_storage_type, metadata_type FROM system.disks").split("\n") + for disk in node.query( + "SELECT name, type, object_storage_type, metadata_type FROM system.disks" + ).split("\n") if disk ] expected_disks = ( From 8e413da8f156ab03c875b9525044265cffcc5b83 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 6 Mar 2024 17:32:08 +0800 Subject: [PATCH 129/985] apply opts for string nested in array --- src/Columns/ColumnArray.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 389b3e97820..44b17c89ae1 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -536,6 +536,8 @@ void ColumnArray::insertManyFromImpl(const ColumnArray & src, size_t position, s return insertManyFromNumber(src, position, length); if (typeid_cast *>(data.get())) return insertManyFromNumber(src, position, length); + if (typeid_cast(data.get())) + return insertManyFromString(src, position, length); if (typeid_cast(data.get())) return insertManyFromNullable(src, position, length); if (typeid_cast(data.get())) From 56fb61e1866e81e9a00b9b98299ddc56a54f5394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 10:53:39 +0000 Subject: [PATCH 130/985] Do not duplicate the first category in case of multiple categories in `FunctionDocumentation` --- src/Common/FunctionDocumentation.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/FunctionDocumentation.cpp b/src/Common/FunctionDocumentation.cpp index 2aad23b90b7..0dc5b48f9d1 100644 --- a/src/Common/FunctionDocumentation.cpp +++ b/src/Common/FunctionDocumentation.cpp @@ -36,6 +36,7 @@ std::string FunctionDocumentation::categoriesAsString() const auto it = categories.begin(); std::string res = *it; + ++it; for (; it != categories.end(); ++it) res += ", " + *it; return res; From 6f726865baf3fea606e7ff46e5d8cd98bda94f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 11:10:02 +0000 Subject: [PATCH 131/985] Add inline docs to functions --- src/Functions/sin.cpp | 10 +++++- src/Functions/visitParamExtractBool.cpp | 30 +++++++++++++++++- src/Functions/visitParamExtractFloat.cpp | 31 ++++++++++++++++++- src/Functions/visitParamExtractInt.cpp | 31 ++++++++++++++++++- src/Functions/visitParamExtractRaw.cpp | 30 +++++++++++++++++- src/Functions/visitParamExtractString.cpp | 30 +++++++++++++++++- src/Functions/visitParamExtractUInt.cpp | 31 ++++++++++++++++++- src/Functions/visitParamHas.cpp | 23 +++++++++++++- ...new_functions_must_be_documented.reference | 8 ----- 9 files changed, 208 insertions(+), 16 deletions(-) diff --git a/src/Functions/sin.cpp b/src/Functions/sin.cpp index dc75f4800c0..914f431adb4 100644 --- a/src/Functions/sin.cpp +++ b/src/Functions/sin.cpp @@ -13,7 +13,15 @@ using FunctionSin = FunctionMathUnary>; REGISTER_FUNCTION(Sin) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction( + FunctionDocumentation{ + .description = "Returns the sine of the argument.", + .syntax = "sin(x)", + .arguments = {{"x", "The number whose sine will be returned. (U)Int*, Float* or Decimal*."}}, + .returned_value = "The sine of x.", + .examples = {{.name = "simple", .query = "SELECT sin(1.23)", .result = "0.9424888019316975"}}, + .categories{"Mathematical", "Trigonometric"}}, + FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/visitParamExtractBool.cpp b/src/Functions/visitParamExtractBool.cpp index 31763fe54ce..2c413ec13bb 100644 --- a/src/Functions/visitParamExtractBool.cpp +++ b/src/Functions/visitParamExtractBool.cpp @@ -21,7 +21,35 @@ using FunctionSimpleJSONExtractBool = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description = "Parses a true/false value from the value of the field named field_name. The result is UInt8.", + .syntax = "simpleJSONExtractBool(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value + = R"(It returns 1 if the value of the field is true, 0 otherwise. This means this function will return 0 including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains true as a string, e.g.: {"field":"true"}. + - If the field contains 1 as a numerical value.)", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +1 +0 +0)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractBool", "simpleJSONExtractBool"); } diff --git a/src/Functions/visitParamExtractFloat.cpp b/src/Functions/visitParamExtractFloat.cpp index 6f6d5274050..fc839142cc7 100644 --- a/src/Functions/visitParamExtractFloat.cpp +++ b/src/Functions/visitParamExtractFloat.cpp @@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractFloat = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses Float64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractFloat(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +-4000 +0 +-3.4 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractFloat", "simpleJSONExtractFloat"); } diff --git a/src/Functions/visitParamExtractInt.cpp b/src/Functions/visitParamExtractInt.cpp index e020c43e8b4..4588fc55c52 100644 --- a/src/Functions/visitParamExtractInt.cpp +++ b/src/Functions/visitParamExtractInt.cpp @@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractInt = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses Int64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractInt(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +-4 +0 +-3 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractInt", "simpleJSONExtractInt"); } diff --git a/src/Functions/visitParamExtractRaw.cpp b/src/Functions/visitParamExtractRaw.cpp index 74a83170545..296429423fe 100644 --- a/src/Functions/visitParamExtractRaw.cpp +++ b/src/Functions/visitParamExtractRaw.cpp @@ -61,7 +61,35 @@ using FunctionSimpleJSONExtractRaw = FunctionsStringSearchToString(); + factory.registerFunction(FunctionDocumentation{ + .description = "Returns the value of the field named field_name as a String, including separators.", + .syntax = "simpleJSONExtractRaw(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value + = "It returns the value of the field as a String including separators if the field exists, or an emtpy String otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"( +"-4e3" +-3.4 +5 +{"def":[1,2,3]})"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractRaw", "simpleJSONExtractRaw"); } diff --git a/src/Functions/visitParamExtractString.cpp b/src/Functions/visitParamExtractString.cpp index 50d5f345189..8dae10638f8 100644 --- a/src/Functions/visitParamExtractString.cpp +++ b/src/Functions/visitParamExtractString.cpp @@ -22,7 +22,35 @@ using FunctionSimpleJSONExtractString = FunctionsStringSearchToString(); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Parses String in double quotes from the value of the field named field_name. + + There is currently no support for code points in the format \uXXXX\uYYYY that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).)", + .syntax = "simpleJSONExtractString(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the value of a field as a String, including separators. The value is unescaped. It returns an empty " + "String: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(\n\0 + +☺ +)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractString", "simpleJSONExtractString"); } diff --git a/src/Functions/visitParamExtractUInt.cpp b/src/Functions/visitParamExtractUInt.cpp index fb58e417f34..777df9fdd24 100644 --- a/src/Functions/visitParamExtractUInt.cpp +++ b/src/Functions/visitParamExtractUInt.cpp @@ -12,7 +12,36 @@ using FunctionSimpleJSONExtractUInt = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses UInt64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractUInt(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +4 +0 +3 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractUInt", "simpleJSONExtractUInt"); } diff --git a/src/Functions/visitParamHas.cpp b/src/Functions/visitParamHas.cpp index 1ed1f1d16e7..09fec782980 100644 --- a/src/Functions/visitParamHas.cpp +++ b/src/Functions/visitParamHas.cpp @@ -21,7 +21,28 @@ using FunctionSimpleJSONHas = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description = "Checks whether there is a field named field_name. The result is UInt8.", + .syntax = "simpleJSONHas(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns 1 if the field exists, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons;)", + .result = R"(1 +0)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamHas", "simpleJSONHas"); } diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 379eea4dbbb..0a11e8b5034 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -643,14 +643,6 @@ shardNum showCertificate sigmoid sign -simpleJSONExtractBool -simpleJSONExtractFloat -simpleJSONExtractInt -simpleJSONExtractRaw -simpleJSONExtractString -simpleJSONExtractUInt -simpleJSONHas -sin sinh sipHash128 sipHash128Keyed From 2dc1721262c9f483917750aaa6139ff7409e02dc Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 6 Mar 2024 11:53:00 +0000 Subject: [PATCH 132/985] Refactorings for consistency --- src/Functions/array/arrayDistance.cpp | 54 +++++++-------- src/Functions/array/arrayDotProduct.cpp | 91 +++++++++++++------------ 2 files changed, 73 insertions(+), 72 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 71564f6fa93..6b72c99d829 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -379,17 +379,17 @@ public: } -#define SUPPORTED_TYPES(action) \ - action(UInt8) \ - action(UInt16) \ - action(UInt32) \ - action(UInt64) \ - action(Int8) \ - action(Int16) \ - action(Int32) \ - action(Int64) \ - action(Float32) \ - action(Float64) +#define SUPPORTED_TYPES(ACTION) \ + ACTION(UInt8) \ + ACTION(UInt16) \ + ACTION(UInt32) \ + ACTION(UInt64) \ + ACTION(Int8) \ + ACTION(Int16) \ + ACTION(Int32) \ + ACTION(Int64) \ + ACTION(Float32) \ + ACTION(Float64) private: @@ -398,12 +398,11 @@ private: { DataTypePtr type_x = typeid_cast(arguments[0].type.get())->getNestedType(); - /// Dynamic disaptch based on the 1st argument type switch (type_x->getTypeId()) { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithFirstType(arguments, input_rows_count); \ + return executeWithResultTypeAndLeftType(arguments, input_rows_count); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -419,17 +418,16 @@ private: } } - template - ColumnPtr executeWithFirstType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const + template + ColumnPtr executeWithResultTypeAndLeftType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const { DataTypePtr type_y = typeid_cast(arguments[1].type.get())->getNestedType(); - /// Dynamic disaptch based on the 2nd argument type switch (type_y->getTypeId()) { #define ON_TYPE(type) \ case TypeIndex::type: \ - return executeWithTypes(arguments[0].column, arguments[1].column, input_rows_count, arguments); \ + return executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column, input_rows_count, arguments); \ break; SUPPORTED_TYPES(ON_TYPE) @@ -445,16 +443,16 @@ private: } } - template - ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const + template + ColumnPtr executeWithResultTypeAndLeftTypeAndRightType(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { if (typeid_cast(col_x.get())) { - return executeWithTypesFirstArgConst(col_x, col_y, input_rows_count, arguments); + return executeWithLeftArgConst(col_x, col_y, input_rows_count, arguments); } else if (typeid_cast(col_y.get())) { - return executeWithTypesFirstArgConst(col_y, col_x, input_rows_count, arguments); + return executeWithLeftArgConst(col_y, col_x, input_rows_count, arguments); } col_x = col_x->convertToFullColumnIfConst(); @@ -463,8 +461,8 @@ private: const auto & array_x = *assert_cast(col_x.get()); const auto & array_y = *assert_cast(col_y.get()); - const auto & data_x = typeid_cast &>(array_x.getData()).getData(); - const auto & data_y = typeid_cast &>(array_y.getData()).getData(); + const auto & data_x = typeid_cast &>(array_x.getData()).getData(); + const auto & data_y = typeid_cast &>(array_y.getData()).getData(); const auto & offsets_x = array_x.getOffsets(); const auto & offsets_y = array_y.getOffsets(); @@ -521,8 +519,8 @@ private: } /// Special case when the 1st parameter is Const - template - ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const + template + ColumnPtr executeWithLeftArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const { col_x = assert_cast(col_x.get())->getDataColumnPtr(); col_y = col_y->convertToFullColumnIfConst(); @@ -530,8 +528,8 @@ private: const auto & array_x = *assert_cast(col_x.get()); const auto & array_y = *assert_cast(col_y.get()); - const auto & data_x = typeid_cast &>(array_x.getData()).getData(); - const auto & data_y = typeid_cast &>(array_y.getData()).getData(); + const auto & data_x = typeid_cast &>(array_x.getData()).getData(); + const auto & data_y = typeid_cast &>(array_y.getData()).getData(); const auto & offsets_x = array_x.getOffsets(); const auto & offsets_y = array_y.getOffsets(); @@ -574,7 +572,7 @@ private: /// - the two most common metrics L2 and cosine distance, /// - the most powerful SIMD instruction set (AVX-512F). #if USE_MULTITARGET_CODE - if constexpr (std::is_same_v && std::is_same_v) /// ResultType is Float32 or Float64 + if constexpr (std::is_same_v && std::is_same_v) /// ResultType is Float32 or Float64 { if constexpr (std::is_same_v || std::is_same_v) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 6c615a058c3..548c79c567f 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -163,26 +163,29 @@ public: return Kernel::getReturnType(nested_types[0], nested_types[1]); } +#define SUPPORTED_TYPES(ACTION) \ + ACTION(UInt8) \ + ACTION(UInt16) \ + ACTION(UInt32) \ + ACTION(UInt64) \ + ACTION(Int8) \ + ACTION(Int16) \ + ACTION(Int32) \ + ACTION(Int64) \ + ACTION(Float32) \ + ACTION(Float64) + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { switch (result_type->getTypeId()) { - #define SUPPORTED_TYPE(type) \ + #define ON_TYPE(type) \ case TypeIndex::type: \ return executeWithResultType(arguments); \ break; - SUPPORTED_TYPE(UInt8) - SUPPORTED_TYPE(UInt16) - SUPPORTED_TYPE(UInt32) - SUPPORTED_TYPE(UInt64) - SUPPORTED_TYPE(Int8) - SUPPORTED_TYPE(Int16) - SUPPORTED_TYPE(Int32) - SUPPORTED_TYPE(Int64) - SUPPORTED_TYPE(Float32) - SUPPORTED_TYPE(Float64) - #undef SUPPORTED_TYPE + SUPPORTED_TYPES(ON_TYPE) + #undef ON_TYPE default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName()); @@ -194,16 +197,16 @@ private: ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const { ColumnPtr res; - if (!((res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)) - || (res = executeWithResultTypeAndLeft(arguments)))) + if (!((res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)) + || (res = executeWithResultTypeAndLeftType(arguments)))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); @@ -211,43 +214,43 @@ private: } template - ColumnPtr executeWithResultTypeAndLeft(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeWithResultTypeAndLeftType(const ColumnsWithTypeAndName & arguments) const { ColumnPtr res; - if ( (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments)) - || (res = executeWithResultTypeAndLeftAndRight(arguments))) + if ( (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) + || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column))) return res; return nullptr; } template - ColumnPtr executeWithResultTypeAndLeftAndRight(const ColumnsWithTypeAndName & arguments) const + ColumnPtr executeWithResultTypeAndLeftTypeAndRightType(ColumnPtr col_x, ColumnPtr col_y) const { - ColumnPtr col_left = arguments[0].column->convertToFullColumnIfConst(); - ColumnPtr col_right = arguments[1].column->convertToFullColumnIfConst(); - if (!col_left || !col_right) + col_x = col_x->convertToFullColumnIfConst(); + col_y = col_y->convertToFullColumnIfConst(); + if (!col_x || !col_y) return nullptr; - const ColumnArray * col_arr_left = checkAndGetColumn(col_left.get()); - const ColumnArray * cokl_arr_right = checkAndGetColumn(col_right.get()); - if (!col_arr_left || !cokl_arr_right) + const ColumnArray * array_x = checkAndGetColumn(col_x.get()); + const ColumnArray * array_y = checkAndGetColumn(col_y.get()); + if (!array_x || !array_y) return nullptr; - const ColumnVector * col_arr_nested_left = checkAndGetColumn>(col_arr_left->getData()); - const ColumnVector * col_arr_nested_right = checkAndGetColumn>(cokl_arr_right->getData()); + const ColumnVector * col_arr_nested_left = checkAndGetColumn>(array_x->getData()); + const ColumnVector * col_arr_nested_right = checkAndGetColumn>(array_y->getData()); if (!col_arr_nested_left || !col_arr_nested_right) return nullptr; - if (!col_arr_left->hasEqualOffsets(*cokl_arr_right)) + if (!array_x->hasEqualOffsets(*array_y)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); auto col_res = ColumnVector::create(); @@ -255,7 +258,7 @@ private: vector( col_arr_nested_left->getData(), col_arr_nested_right->getData(), - col_arr_left->getOffsets(), + array_x->getOffsets(), col_res->getData()); return col_res; From 076482e8bd503ca352695173d87e9b48228389f0 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 6 Mar 2024 14:04:09 +0100 Subject: [PATCH 133/985] Remove whitespaces --- src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 42c6e63da01..e0d0fda81cb 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -132,7 +132,7 @@ class OptimizeShardingKeyRewriteIn : public InDepthQueryTreeVisitorWithContext; - + OptimizeShardingKeyRewriteIn(OptimizeShardingKeyRewriteInVisitor::Data data_, ContextPtr context) : Base(std::move(context)) , data(std::move(data_)) From 7065e650e1d007be4659ddb1f070b48e19cdef55 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 6 Mar 2024 14:34:03 +0100 Subject: [PATCH 134/985] Code cleanup --- src/DataTypes/ObjectUtils.cpp | 24 ++------ src/DataTypes/ObjectUtils.h | 2 - .../ClusterProxy/SelectStreamFactory.cpp | 7 +-- .../ClusterProxy/SelectStreamFactory.h | 2 +- src/Processors/QueryPlan/ReadFromRemote.cpp | 61 +++---------------- src/Storages/StorageDistributed.cpp | 1 - tests/analyzer_integration_broken_tests.txt | 1 - .../test_distributed_type_object/test.py | 3 +- 8 files changed, 18 insertions(+), 83 deletions(-) diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 47d8c5c9113..ccfa0a28f13 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -1,4 +1,8 @@ #include +#include +#include +#include +#include #include #include #include @@ -21,16 +25,6 @@ #include #include #include -#include "Analyzer/ConstantNode.h" -#include "Analyzer/FunctionNode.h" -#include "Analyzer/IQueryTreeNode.h" -#include "Analyzer/Identifier.h" -#include "Analyzer/IdentifierNode.h" -#include "Analyzer/QueryNode.h" -#include "Analyzer/Utils.h" -#include -#include -#include "Common/logger_useful.h" namespace DB @@ -991,22 +985,12 @@ MissingObjectList replaceMissedSubcolumnsByConstants( { auto constant = std::make_shared(type->getDefault(), type); constant->setAlias(table_expression->getAlias() + "." + name); - // auto materialize = std::make_shared("materialize"); - - // auto function = FunctionFactory::instance().get("materialize", context); - // materialize->getArguments().getNodes() = { constant }; - // materialize->resolveAsFunction(function->build(materialize->getArgumentColumns())); - // materialize->setAlias(name); column_name_to_node[name] = buildCastFunction(constant, type, context); missed_list.push_back({ constant->getValueStringRepresentation() + "_" + constant->getResultType()->getName(), table_expression->getAlias() + "." + name }); - LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "{} -> {}", missed_list.back().first, missed_list.back().second); - LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Name {} Expression\n{}", name, column_name_to_node[name]->dumpTree()); } - LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Table expression\n{} ", table_expression->dumpTree()); replaceColumns(query, table_expression, column_name_to_node); - LOG_DEBUG(&Poco::Logger::get("replaceMissedSubcolumnsByConstants"), "Result:\n{} ", query->dumpTree()); return missed_list; } diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index 013e525832e..6ef19baf5ae 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -3,8 +3,6 @@ #include #include #include -#include "Analyzer/IQueryTreeNode.h" -#include "Interpreters/Context_fwd.h" #include #include #include diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 5bcd1ce68cb..4fccd83c8c0 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -5,10 +5,9 @@ #include #include #include -#include "Analyzer/IQueryTreeNode.h" -#include "Interpreters/InterpreterSelectQueryAnalyzer.h" -#include "Interpreters/SelectQueryOptions.h" -#include "Planner/Utils.h" +#include +#include +#include #include #include #include diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index bee7edb3c19..61694830b3d 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -8,7 +9,6 @@ #include #include #include -#include "Analyzer/IQueryTreeNode.h" namespace DB { diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index ac507c6d555..72848a37f6e 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -15,7 +14,6 @@ #include #include #include -#include "DataTypes/ObjectUtils.h" #include #include #include @@ -33,54 +31,14 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -static void addRenamingActions(Pipe & pipe, const MissingObjectList & missed_list, const Block & output_header) -{ - if (missed_list.empty()) - return; - - const auto & output_columns = output_header.getColumnsWithTypeAndName(); - std::vector indexes; - for (size_t i = 0; i < output_columns.size(); ++i) - { - bool found = false; - for (auto const & elem : missed_list) - { - if (output_columns[i].name.contains(elem.second)) - { - found = true; - break; - } - } - if (found) - indexes.push_back(i); - } - - auto dag = std::make_shared(pipe.getHeader().getColumnsWithTypeAndName()); - - for (size_t index : indexes) - { - dag->addOrReplaceInOutputs(dag->addAlias(*dag->getOutputs()[index], output_header.getByPosition(index).name)); - } - - // dag->addAliases(rename_to_apply); - - auto convert_actions = std::make_shared(dag); - pipe.addSimpleTransform([&](const Block & cur_header, Pipe::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header, convert_actions); - }); - - LOG_DEBUG(&Poco::Logger::get("addRenamingActions"), "EXPECTED:\n{}", output_header.dumpStructure()); - - LOG_DEBUG(&Poco::Logger::get("addRenamingActions"), "{}", pipe.getHeader().dumpStructure()); -} - -static void addConvertingActions(Pipe & pipe, const Block & header) +static void addConvertingActions(Pipe & pipe, const Block & header, bool use_positions_to_match = false) { if (blocksHaveEqualStructure(pipe.getHeader(), header)) return; - auto get_converting_dag = [](const Block & block_, const Block & header_) + auto match_mode = use_positions_to_match ? ActionsDAG::MatchColumnsMode::Position : ActionsDAG::MatchColumnsMode::Name; + + auto get_converting_dag = [mode = match_mode](const Block & block_, const Block & header_) { /// Convert header structure to expected. /// Also we ignore constants from result and replace it with constants from header. @@ -88,7 +46,7 @@ static void addConvertingActions(Pipe & pipe, const Block & header) return ActionsDAG::makeConvertingActions( block_.getColumnsWithTypeAndName(), header_.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Name, + mode, true); }; @@ -260,8 +218,7 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream }; pipes.emplace_back(createDelayedPipe(shard.header, lazily_create_stream, add_totals, add_extremes)); - addRenamingActions(pipes.back(), shard.missing_object_list, output_stream->header); - addConvertingActions(pipes.back(), output_stream->header); + addConvertingActions(pipes.back(), output_stream->header, !shard.missing_object_list.empty()); } void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFactory::Shard & shard) @@ -342,8 +299,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addRenamingActions(pipes.back(), shard.missing_object_list, output_stream->header); - addConvertingActions(pipes.back(), output_stream->header); + addConvertingActions(pipes.back(), output_stream->header, !shard.missing_object_list.empty()); } } else @@ -372,8 +328,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact pipes.emplace_back( createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); - addRenamingActions(pipes.back(), shard.missing_object_list, output_stream->header); - addConvertingActions(pipes.back(), output_stream->header); + addConvertingActions(pipes.back(), output_stream->header, !shard.missing_object_list.empty()); } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 34ab21a4751..726f1788115 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -30,7 +30,6 @@ #include #include #include -#include "Analyzer/IQueryTreeNode.h" #include #include diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 796ca6bca22..a7954f91efa 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,6 +1,5 @@ test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups -test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster diff --git a/tests/integration/test_distributed_type_object/test.py b/tests/integration/test_distributed_type_object/test.py index 7e6c000cb8e..360087c9dda 100644 --- a/tests/integration/test_distributed_type_object/test.py +++ b/tests/integration/test_distributed_type_object/test.py @@ -85,10 +85,11 @@ def test_distributed_type_object(started_cluster): 3\t\t\t\tfoo""" ) + # The following query is not supported by analyzer now assert ( TSV( node1.query( - "SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM dist_table ORDER BY id" + "SELECT id, data.k1, data.k2.k3, data.k2.k4, data.k5 FROM dist_table ORDER BY id SETTINGS allow_experimental_analyzer = 0" ) ) == expected From c3909743ed95adf5efd16e69f353aab8af73978b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 6 Mar 2024 12:36:00 +0000 Subject: [PATCH 135/985] Remove repeated unnecessary unpacking of const columns --- src/Functions/array/arrayDotProduct.cpp | 88 +++++++++++++------------ 1 file changed, 46 insertions(+), 42 deletions(-) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 548c79c567f..c27170cd35b 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -19,7 +19,6 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; - extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; } @@ -196,40 +195,51 @@ private: template ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const { - ColumnPtr res; - if (!((res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)) - || (res = executeWithResultTypeAndLeftType(arguments)))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + DataTypePtr type_x = typeid_cast(arguments[0].type.get())->getNestedType(); - return res; + switch (type_x->getTypeId()) + { +#define ON_TYPE(type) \ + case TypeIndex::type: \ + return executeWithResultTypeAndLeftType(arguments); \ + break; + + SUPPORTED_TYPES(ON_TYPE) +#undef ON_TYPE + + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), + type_x->getName()); + } } template ColumnPtr executeWithResultTypeAndLeftType(const ColumnsWithTypeAndName & arguments) const { - ColumnPtr res; - if ( (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column)) - || (res = executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column))) - return res; + DataTypePtr type_y = typeid_cast(arguments[1].type.get())->getNestedType(); - return nullptr; + switch (type_y->getTypeId()) + { + #define ON_TYPE(type) \ + case TypeIndex::type: \ + return executeWithResultTypeAndLeftTypeAndRightType(arguments[0].column, arguments[1].column); \ + break; + + SUPPORTED_TYPES(ON_TYPE) + #undef ON_TYPE + + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), + type_y->getName()); + } } template @@ -237,28 +247,22 @@ private: { col_x = col_x->convertToFullColumnIfConst(); col_y = col_y->convertToFullColumnIfConst(); - if (!col_x || !col_y) - return nullptr; - const ColumnArray * array_x = checkAndGetColumn(col_x.get()); - const ColumnArray * array_y = checkAndGetColumn(col_y.get()); - if (!array_x || !array_y) - return nullptr; + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); - const ColumnVector * col_arr_nested_left = checkAndGetColumn>(array_x->getData()); - const ColumnVector * col_arr_nested_right = checkAndGetColumn>(array_y->getData()); - if (!col_arr_nested_left || !col_arr_nested_right) - return nullptr; + const auto & data_x = typeid_cast &>(array_x.getData()).getData(); + const auto & data_y = typeid_cast &>(array_y.getData()).getData(); - if (!array_x->hasEqualOffsets(*array_y)) + if (!array_x.hasEqualOffsets(array_y)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); auto col_res = ColumnVector::create(); vector( - col_arr_nested_left->getData(), - col_arr_nested_right->getData(), - array_x->getOffsets(), + data_x, + data_y, + array_x.getOffsets(), col_res->getData()); return col_res; From 5b94f9b4115e3b7e03118b4a4f4999139e58511e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Mar 2024 15:31:19 +0100 Subject: [PATCH 136/985] Check children first --- src/Storages/VirtualColumnUtils.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 6d66453442e..e8441b96782 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -469,18 +469,18 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo static bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) { - if (node->type != ActionsDAG::ActionType::FUNCTION) - return true; - - if (!node->function_base->isDeterministicInScopeOfQuery()) - return false; - for (const auto * child : node->children) { if (!isDeterministicInScopeOfQuery(child)) return false; } + if (node->type != ActionsDAG::ActionType::FUNCTION) + return true; + + if (!node->function_base->isDeterministicInScopeOfQuery()) + return false; + return true; } From a69bcc29f5bb0bdaca1757673dac1574c97b1e2f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 6 Mar 2024 14:53:11 +0000 Subject: [PATCH 137/985] Refactor a sanity check --- src/Functions/array/arrayDistance.cpp | 20 +++---------------- src/Functions/array/arrayDotProduct.cpp | 8 +++++--- .../queries/0_stateless/02708_dotProduct.sql | 2 +- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index 6b72c99d829..c9e6e97749f 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -18,11 +18,11 @@ namespace DB { namespace ErrorCodes { + extern const int ARGUMENT_OUT_OF_BOUND; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DONT_MATCH; - extern const int ARGUMENT_OUT_OF_BOUND; } struct L1Distance @@ -465,22 +465,9 @@ private: const auto & data_y = typeid_cast &>(array_y.getData()).getData(); const auto & offsets_x = array_x.getOffsets(); - const auto & offsets_y = array_y.getOffsets(); - /// Check that arrays in both columns are the sames size - for (size_t row = 0; row < offsets_x.size(); ++row) - { - if (offsets_x[row] != offsets_y[row]) [[unlikely]] - { - ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0; - throw Exception( - ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, - "Arguments of function {} have different array sizes: {} and {}", - getName(), - offsets_x[row] - prev_offset, - offsets_y[row] - prev_offset); - } - } + if (!array_x.hasEqualOffsets(array_y)) + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName()); const typename Kernel::ConstParams kernel_params = initConstParams(arguments); @@ -534,7 +521,6 @@ private: const auto & offsets_x = array_x.getOffsets(); const auto & offsets_y = array_y.getOffsets(); - /// Check that arrays in both columns are the sames size ColumnArray::Offset prev_offset = 0; for (size_t row : collections::range(0, offsets_y.size())) { diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index c27170cd35b..3abd1a6c6db 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -18,9 +18,9 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; + extern const int SIZES_OF_ARRAYS_DONT_MATCH; } @@ -254,15 +254,17 @@ private: const auto & data_x = typeid_cast &>(array_x.getData()).getData(); const auto & data_y = typeid_cast &>(array_y.getData()).getData(); + const auto & offsets_x = array_x.getOffsets(); + if (!array_x.hasEqualOffsets(array_y)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName()); auto col_res = ColumnVector::create(); vector( data_x, data_y, - array_x.getOffsets(), + offsets_x, col_res->getData()); return col_res; diff --git a/tests/queries/0_stateless/02708_dotProduct.sql b/tests/queries/0_stateless/02708_dotProduct.sql index 6ad615664e8..2035e23cf1d 100644 --- a/tests/queries/0_stateless/02708_dotProduct.sql +++ b/tests/queries/0_stateless/02708_dotProduct.sql @@ -4,7 +4,7 @@ SELECT arrayDotProduct([1, 2]); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATC SELECT arrayDotProduct([1, 2], 'abc'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayDotProduct('abc', [1, 2]); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT arrayDotProduct([1, 2], ['abc', 'def']); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT arrayDotProduct([1, 2], [3, 4, 5]); -- { serverError BAD_ARGUMENTS } +SELECT arrayDotProduct([1, 2], [3, 4, 5]); -- { serverError SIZES_OF_ARRAYS_DONT_MATCH } SELECT dotProduct([1, 2], (3, 4, 5)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT '-- Tests'; From 526f162082dfbb4ad2fb5d3d807dfd2ad9b54bdd Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Feb 2024 18:20:47 +0000 Subject: [PATCH 138/985] Fix logical error on bad compatibility setting value type --- src/Core/Settings.cpp | 4 ++++ .../03003_compatibility_setting_bad_value.reference | 0 .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference create mode 100644 tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index a38197b9eeb..fb456b46d89 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -114,7 +114,11 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { if (name == "compatibility") + { + if (value.getType() != Field::Types::Which::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName()); applyCompatibilitySetting(value.get()); + } /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, /// otherwise the next time we will change compatibility setting diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql new file mode 100644 index 00000000000..9a6f4e7944a --- /dev/null +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -0,0 +1,2 @@ +select 42 settings compatibility=NULL; -- {clientError BAD_GET} + From bdb76d9dd4b42ab4f40db0d371165665171afb4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 16:30:22 +0000 Subject: [PATCH 139/985] Fix aspell errors --- docs/en/sql-reference/functions/json-functions.md | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 246cb8972fb..e920ab82988 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -264,7 +264,7 @@ simpleJSONExtractRaw(json, field_name) **Returned value** -It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an emtpy `String` otherwise. +It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. **Example** diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 3614bcb7452..917b2cdcc71 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2724 +personal_ws-1.1 en 2758 AArch ACLs ALTERs @@ -843,7 +843,6 @@ SendScalars ShareAlike SharedMergeTree Shortkeys -Shortkeys SimHash Simhash SimpleAggregateFunction @@ -1703,7 +1702,6 @@ hyperscan hypot hyvor iTerm -iTerm icosahedron icudata idempotency @@ -2327,6 +2325,14 @@ shortcircuit shortkeys shoutout simdjson +simpleJSON +simpleJSONExtractBool +simpleJSONExtractFloat +simpleJSONExtractInt +simpleJSONExtractRaw +simpleJSONExtractString +simpleJSONExtractUInt +simpleJSONHas simpleLinearRegression simpleaggregatefunction simplelinearregression From 77a980373a1dab7c49e5713ba7050d218c1250c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 16:31:27 +0000 Subject: [PATCH 140/985] Fix typo in inline doc --- src/Functions/visitParamExtractRaw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/visitParamExtractRaw.cpp b/src/Functions/visitParamExtractRaw.cpp index 296429423fe..3cdc5001e13 100644 --- a/src/Functions/visitParamExtractRaw.cpp +++ b/src/Functions/visitParamExtractRaw.cpp @@ -68,7 +68,7 @@ REGISTER_FUNCTION(VisitParamExtractRaw) = {{"json", "The JSON in which the field is searched for. String."}, {"field_name", "The name of the field to search for. String literal."}}, .returned_value - = "It returns the value of the field as a String including separators if the field exists, or an emtpy String otherwise.", + = "It returns the value of the field as a String including separators if the field exists, or an empty String otherwise.", .examples = {{.name = "simple", .query = R"(CREATE TABLE jsons From d9b5f9a086d8bc1af5387edee77d0e3fdbf4d9b0 Mon Sep 17 00:00:00 2001 From: Nikolay Monkov Date: Wed, 6 Mar 2024 21:37:56 +0500 Subject: [PATCH 141/985] version has been added to docker labels --- tests/ci/docker_server.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 38d0ea6d86b..803dbfcd92a 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -216,11 +216,12 @@ def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]: return tags -def buildx_args(urls: Dict[str, str], arch: str, direct_urls: List[str]) -> List[str]: +def buildx_args(urls: Dict[str, str], arch: str, direct_urls: List[str], version: str) -> List[str]: args = [ f"--platform=linux/{arch}", f"--label=build-url={GITHUB_RUN_URL}", f"--label=com.clickhouse.build.githash={git.sha}", + f"--label=com.clickhouse.build.version={version}", ] if direct_urls: args.append(f"--build-arg=DIRECT_DOWNLOAD_URLS='{' '.join(direct_urls)}'") @@ -267,7 +268,7 @@ def build_and_push_image( urls = [url for url in direct_urls[arch] if ".deb" in url] else: urls = [url for url in direct_urls[arch] if ".tgz" in url] - cmd_args.extend(buildx_args(repo_urls, arch, direct_urls=urls)) + cmd_args.extend(buildx_args(repo_urls, arch, direct_urls=urls, version=version.describe)) if not push: cmd_args.append(f"--tag={image.repo}:{arch_tag}") cmd_args.extend( From 245ea0c186614a5a13a7e0bed79f94bc6ad46d87 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 6 Mar 2024 15:26:11 +0000 Subject: [PATCH 142/985] Implement const/non-const shortcut --- src/Functions/array/arrayDistance.cpp | 3 +- src/Functions/array/arrayDotProduct.cpp | 110 ++++++++++++++---- tests/performance/dotProduct.xml | 1 + .../0_stateless/02708_dotProduct.reference | 7 ++ .../queries/0_stateless/02708_dotProduct.sql | 9 ++ 5 files changed, 106 insertions(+), 24 deletions(-) diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp index c9e6e97749f..03f0bc7b286 100644 --- a/src/Functions/array/arrayDistance.cpp +++ b/src/Functions/array/arrayDistance.cpp @@ -477,6 +477,7 @@ private: /// Do the actual computation ColumnArray::Offset prev = 0; size_t row = 0; + for (auto off : offsets_x) { /// Process chunks in vectorized manner @@ -522,7 +523,7 @@ private: const auto & offsets_y = array_y.getOffsets(); ColumnArray::Offset prev_offset = 0; - for (size_t row : collections::range(0, offsets_y.size())) + for (size_t row = 0; row < offsets_y.size(); ++row) { if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]] { diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 3abd1a6c6db..f9a6687e028 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -245,6 +245,15 @@ private: template ColumnPtr executeWithResultTypeAndLeftTypeAndRightType(ColumnPtr col_x, ColumnPtr col_y) const { + if (typeid_cast(col_x.get())) + { + return executeWithLeftArgConst(col_x, col_y); + } + else if (typeid_cast(col_y.get())) + { + return executeWithLeftArgConst(col_y, col_x); + } + col_x = col_x->convertToFullColumnIfConst(); col_y = col_y->convertToFullColumnIfConst(); @@ -260,30 +269,83 @@ private: throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName()); auto col_res = ColumnVector::create(); + auto & result = col_res->getData(); - vector( - data_x, - data_y, - offsets_x, - col_res->getData()); - - return col_res; - } - - template - static void vector( - const PaddedPODArray & left, - const PaddedPODArray & right, - const ColumnArray::Offsets & offsets, - PaddedPODArray & result) - { - size_t size = offsets.size(); + size_t size = offsets_x.size(); result.resize(size); ColumnArray::Offset current_offset = 0; for (size_t row = 0; row < size; ++row) { - size_t array_size = offsets[row] - current_offset; + size_t array_size = offsets_x[row] - current_offset; + + size_t i = 0; + + /// Process chunks in vectorized manner + static constexpr size_t VEC_SIZE = 4; + typename Kernel::template State states[VEC_SIZE]; + for (; i + VEC_SIZE < array_size; i += VEC_SIZE) + { + for (size_t j = 0; j < VEC_SIZE; ++j) + Kernel::template accumulate(states[j], static_cast(data_x[current_offset + i + j]), static_cast(data_y[current_offset + i + j])); + } + + typename Kernel::template State state; + for (const auto & other_state : states) + Kernel::template combine(state, other_state); + + /// Process the tail + for (; i < array_size; ++i) + Kernel::template accumulate(state, static_cast(data_x[current_offset + i]), static_cast(data_y[current_offset + i])); + + result[row] = Kernel::template finalize(state); + + current_offset = offsets_x[row]; + } + + return col_res; + } + + template + ColumnPtr executeWithLeftArgConst(ColumnPtr col_x, ColumnPtr col_y) const + { + col_x = assert_cast(col_x.get())->getDataColumnPtr(); + col_y = col_y->convertToFullColumnIfConst(); + + const auto & array_x = *assert_cast(col_x.get()); + const auto & array_y = *assert_cast(col_y.get()); + + const auto & data_x = typeid_cast &>(array_x.getData()).getData(); + const auto & data_y = typeid_cast &>(array_y.getData()).getData(); + + const auto & offsets_x = array_x.getOffsets(); + const auto & offsets_y = array_y.getOffsets(); + + ColumnArray::Offset prev_offset = 0; + for (size_t row = 0; row < offsets_y.size(); ++row) + { + if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]] + { + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, + "Arguments of function {} have different array sizes: {} and {}", + getName(), + offsets_x[0], + offsets_y[row] - prev_offset); + } + prev_offset = offsets_y[row]; + } + + auto col_res = ColumnVector::create(); + auto & result = col_res->getData(); + + size_t size = offsets_y.size(); + result.resize(size); + + ColumnArray::Offset current_offset = 0; + for (size_t row = 0; row < size; ++row) + { + size_t array_size = offsets_x[0]; typename Kernel::template State state; size_t i = 0; @@ -292,13 +354,14 @@ private: /// To avoid combinatorial explosion of SIMD kernels, focus on /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x /// 10 input types x 8 output types, + /// - const/non-const inputs instead of non-const/non-const inputs /// - the most powerful SIMD instruction set (AVX-512F). #if USE_MULTITARGET_CODE if constexpr ((std::is_same_v || std::is_same_v) && std::is_same_v && std::is_same_v) { if (isArchSupported(TargetArch::AVX512F)) - Kernel::template accumulateCombine(&left[current_offset], &right[current_offset], array_size, i, state); + Kernel::template accumulateCombine(&data_x[0], &data_y[current_offset], array_size, i, state); } #else /// Process chunks in vectorized manner @@ -307,7 +370,7 @@ private: for (; i + VEC_SIZE < array_size; i += VEC_SIZE) { for (size_t j = 0; j < VEC_SIZE; ++j) - Kernel::template accumulate(states[j], static_cast(left[i + j]), static_cast(right[i + j])); + Kernel::template accumulate(states[j], static_cast(data_x[i + j]), static_cast(data_y[current_offset + i + j])); } for (const auto & other_state : states) @@ -316,13 +379,14 @@ private: /// Process the tail for (; i < array_size; ++i) - Kernel::template accumulate(state, static_cast(left[i]), static_cast(right[i])); + Kernel::template accumulate(state, static_cast(data_x[i]), static_cast(data_y[current_offset + i])); - /// ResultType res = Kernel::template finalize(state); result[row] = Kernel::template finalize(state); - current_offset = offsets[row]; + current_offset = offsets_y[row]; } + + return col_res; } }; diff --git a/tests/performance/dotProduct.xml b/tests/performance/dotProduct.xml index 6e056964ebb..a0ab7beea9c 100644 --- a/tests/performance/dotProduct.xml +++ b/tests/performance/dotProduct.xml @@ -56,6 +56,7 @@ SELECT sum(dp) FROM (SELECT dotProduct(v, v) AS dp FROM vecs_{element_type}) + WITH (SELECT v FROM vecs_{element_type} limit 1) AS a SELECT sum(dp) FROM (SELECT dotProduct(a, v) AS dp FROM vecs_{element_type}) DROP TABLE vecs_{element_type} diff --git a/tests/queries/0_stateless/02708_dotProduct.reference b/tests/queries/0_stateless/02708_dotProduct.reference index 5cc9a9f0502..593071a3521 100644 --- a/tests/queries/0_stateless/02708_dotProduct.reference +++ b/tests/queries/0_stateless/02708_dotProduct.reference @@ -32,3 +32,10 @@ 32 32 32 +-- Tests that trigger special paths +0 61 +1 186 +0 62 +1 187 +0 62 +1 187 diff --git a/tests/queries/0_stateless/02708_dotProduct.sql b/tests/queries/0_stateless/02708_dotProduct.sql index 2035e23cf1d..ac94ecc28d3 100644 --- a/tests/queries/0_stateless/02708_dotProduct.sql +++ b/tests/queries/0_stateless/02708_dotProduct.sql @@ -45,3 +45,12 @@ SELECT '-- Aliases'; SELECT scalarProduct([1, 2, 3], [4, 5, 6]); SELECT scalarProduct((1, 2, 3), (4, 5, 6)); SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]); -- actually no alias but the internal function for arrays + +SELECT '-- Tests that trigger special paths'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab(id UInt64, vec Array(Float32)) ENGINE = MergeTree ORDER BY id; +INSERT INTO tab VALUES (0, [0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0, 3.0, 0.0, 1.0, 2.0]) (1, [5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0]); +SELECT id, arrayDotProduct(vec, vec) FROM tab ORDER BY id; -- non-const / non-const +SELECT id, arrayDotProduct([5.0, 2.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0]::Array(Float32), vec) FROM tab ORDER BY id; -- const / non-const +SELECT id, arrayDotProduct([5.0, 2.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0, 3.0, 5.0, 1.0, 2.0]::Array(Float64), vec) FROM tab ORDER BY id; -- const / non-const +DROP TABLE tab; From 06a7665d0e780b52b4c1e2c1115ed41050d2d03a Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:19:52 +0100 Subject: [PATCH 143/985] Update encoding-functions.md Add missing "Result:" after query --- docs/en/sql-reference/functions/encoding-functions.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index c81b3e35317..4f6da764b3c 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -469,6 +469,7 @@ Query: ```sql SELECT mortonEncode(1, 2, 3); ``` +Result: ```response 53 From 87f3c957c7dc3d16d9967723e30215e12a0b5dc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 6 Mar 2024 21:16:22 +0100 Subject: [PATCH 144/985] Blind experiment --- base/base/itoa.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/base/base/itoa.h b/base/base/itoa.h index 513070c99d9..c450090d82f 100644 --- a/base/base/itoa.h +++ b/base/base/itoa.h @@ -349,16 +349,32 @@ namespace convert template -static inline char * writeUIntText(T x, char * p) +static inline char * writeUIntText(T _x, char * p) { - static_assert(is_unsigned_v); +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wbit-int-extension" + int len = digits10(_x); + static_assert(std::is_same_v || std::is_same_v); + using T_ = std::conditional_t, unsigned __int128, unsigned _BitInt(256)>; +#pragma clang diagnostic pop - int len = digits10(x); - auto * pp = p + len; - while (x >= 100) + T_ x; + T_ hundred(100ULL); + if constexpr (std::is_same_v) { - const auto i = x % 100; - x /= 100; + x = (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); + } + else + { + x = (T_(_x.items[T::_impl::little(3)]) << 192) + (T_(_x.items[T::_impl::little(2)]) << 128) + + (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); + } + + auto * pp = p + len; + while (x >= hundred) + { + const auto i = x % hundred; + x /= hundred; pp -= 2; outTwoDigits(pp, i); } From c192d0b12532060d14934e60164df7ce771d9399 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 6 Mar 2024 22:29:41 +0100 Subject: [PATCH 145/985] impl --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 83 +++++++++++++------ src/Storages/MergeTree/IMergeTreeDataPart.h | 8 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 3 +- .../MergeTree/MergeTreeDataPartCompact.h | 12 +-- .../MergeTree/MergeTreeDataPartWide.cpp | 3 +- .../MergeTree/MergeTreeDataPartWide.h | 4 +- .../MergeTree/MergeTreeIndexGranularity.cpp | 5 +- .../MergeTree/MergeTreeIndexGranularity.h | 2 + .../MergeTreeIndexGranularityInfo.cpp | 14 +++- .../MergeTree/MergeTreeIndexGranularityInfo.h | 4 + 10 files changed, 97 insertions(+), 41 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index a9bdceacef0..3922d5018c6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2,38 +2,41 @@ #include #include +#include #include -#include #include +#include +#include #include -#include +#include +#include +#include +#include #include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include @@ -1966,7 +1969,39 @@ void IMergeTreeDataPart::checkConsistencyBase() const } } -void IMergeTreeDataPart::checkConsistency(bool /* require_part_metadata */) const +void IMergeTreeDataPart::checkConsistency(bool require_part_metadata) const +{ + try + { + checkConsistencyBase(); + doCheckConsistency(require_part_metadata); + } + catch (Exception & e) + { + const auto part_state = fmt::format( + "state: {}, is_unexpected_local_part: {}, is_frozen: {}, is_duplicate: {}", + stateString(), + is_unexpected_local_part, + is_frozen, + is_duplicate, + is_temp); + + const auto debug_info = fmt::format( + "columns: {}, getMarkSizeInBytes: {}, getMarksCount: {}, index_granularity_info: [{}], index_granularity: [{}], " + "part_state: [{}]", + columns.toString(), + index_granularity_info.getMarkSizeInBytes(columns.size()), + index_granularity.getMarksCount(), + index_granularity_info.describe(), + index_granularity.describe(), + part_state); + + e.addMessage(debug_info); + e.rethrow(); + } +} + +void IMergeTreeDataPart::doCheckConsistency(bool /* require_part_metadata */) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'checkConsistency' is not implemented for part with type {}", getType().toString()); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 91c559d30c8..209c2d9a256 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -498,7 +498,7 @@ public: void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); /// Checks the consistency of this data part. - virtual void checkConsistency(bool require_part_metadata) const; + void checkConsistency(bool require_part_metadata) const; /// Checks the consistency of this data part, and check the consistency of its projections (if any) as well. void checkConsistencyWithProjections(bool require_part_metadata) const; @@ -586,8 +586,6 @@ protected: void removeIfNeeded(); - void checkConsistencyBase() const; - /// Fill each_columns_size and total_size with sizes from columns files on /// disk using columns and checksums. virtual void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const = 0; @@ -610,6 +608,8 @@ protected: void initializeIndexGranularityInfo(); + virtual void doCheckConsistency(bool require_part_metadata) const; + private: String mutable_name; mutable MergeTreeDataPartState state{MergeTreeDataPartState::Temporary}; @@ -697,6 +697,8 @@ private: void incrementStateMetric(MergeTreeDataPartState state) const; void decrementStateMetric(MergeTreeDataPartState state) const; + void checkConsistencyBase() const; + /// This ugly flag is needed for debug assertions only mutable bool part_is_probably_removed_from_disk = false; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 0ecd7abe183..5d4b602b5b8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -157,9 +157,8 @@ std::optional MergeTreeDataPartCompact::getColumnModificationTime(const return getDataPartStorage().getFileLastModified(DATA_FILE_NAME_WITH_EXTENSION).epochTime(); } -void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) const +void MergeTreeDataPartCompact::doCheckConsistency(bool require_part_metadata) const { - checkConsistencyBase(); String mrk_file_name = DATA_FILE_NAME + getMarksFileExtension(); if (!checksums.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 35a358b3720..f897bcb0bfd 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -67,14 +67,14 @@ protected: MergeTreeIndexGranularity & index_granularity_, const MergeTreeIndexGranularityInfo & index_granularity_info_, size_t columns_count, const IDataPartStorage & data_part_storage_); -private: - void checkConsistency(bool require_part_metadata) const override; + void doCheckConsistency(bool require_part_metadata) const override; - /// Loads marks index granularity into memory - void loadIndexGranularity() override; + private: + /// Loads marks index granularity into memory + void loadIndexGranularity() override; - /// Compact parts doesn't support per column size, only total size - void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; + /// Compact parts doesn't support per column size, only total size + void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index dc6c1f0019d..0111f1e7b40 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -182,9 +182,8 @@ MergeTreeDataPartWide::~MergeTreeDataPartWide() removeIfNeeded(); } -void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const +void MergeTreeDataPartWide::doCheckConsistency(bool require_part_metadata) const { - checkConsistencyBase(); std::string marks_file_extension = index_granularity_info.mark_type.getFileExtension(); if (!checksums.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 14147c4ad56..508ea16d2d4 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -62,9 +62,9 @@ protected: MergeTreeIndexGranularity & index_granularity_, MergeTreeIndexGranularityInfo & index_granularity_info_, const IDataPartStorage & data_part_storage_, const std::string & any_column_file_name); -private: - void checkConsistency(bool require_part_metadata) const override; + void doCheckConsistency(bool require_part_metadata) const override; +private: /// Loads marks index granularity into memory void loadIndexGranularity() override; diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 5fdd0555777..2a45ab1d927 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -121,5 +121,8 @@ void MergeTreeIndexGranularity::resizeWithFixedGranularity(size_t size, size_t f } } - +std::string MergeTreeIndexGranularity::describe() const +{ + return fmt::format("initialized: {}, marks_rows_partial_sums: [{}]", initialized, fmt::join(marks_rows_partial_sums, ", ")); +} } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.h b/src/Storages/MergeTree/MergeTreeIndexGranularity.h index f5677995ae0..d67762f7293 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.h @@ -95,6 +95,8 @@ public: /// Add `size` of marks with `fixed_granularity` rows void resizeWithFixedGranularity(size_t size, size_t fixed_granularity); + + std::string describe() const; }; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index da89d52a9ff..1ff72a4e36d 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -88,6 +88,10 @@ std::string MarkType::getFileExtension() const } } +std::string MarkType::describe() const +{ + return fmt::format("adaptive: {}, compressed: {}, part_type: {}", adaptive, compressed, part_type); +} std::optional MergeTreeIndexGranularityInfo::getMarksTypeFromFilesystem(const IDataPartStorage & data_part_storage) { @@ -132,10 +136,18 @@ size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) con throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown part type"); } +std::string MergeTreeIndexGranularityInfo::describe() const +{ + return fmt::format( + "mark_type: [{}], index_granularity_bytes: {}, fixed_index_granularity: {}", + mark_type.describe(), + index_granularity_bytes, + fixed_index_granularity); +} + size_t getAdaptiveMrkSizeCompact(size_t columns_num) { /// Each mark contains number of rows in granule and two offsets for every column. return sizeof(UInt64) * (columns_num * 2 + 1); } - } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index af008866919..85006c3ffde 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -24,6 +24,8 @@ struct MarkType static bool isMarkFileExtension(std::string_view extension); std::string getFileExtension() const; + std::string describe() const; + bool adaptive = false; bool compressed = false; MergeTreeDataPartType::Value part_type = MergeTreeDataPartType::Unknown; @@ -58,6 +60,8 @@ public: size_t getMarkSizeInBytes(size_t columns_num = 1) const; static std::optional getMarksTypeFromFilesystem(const IDataPartStorage & data_part_storage); + + std::string describe() const; }; constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } From 2b52583e06056e19df97216f41b81102bca8bd9d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 6 Mar 2024 23:01:42 +0100 Subject: [PATCH 146/985] fix style --- src/Storages/MergeTree/MergeTreeDataPartCompact.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index f897bcb0bfd..8bbec2808d7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -69,7 +69,7 @@ protected: void doCheckConsistency(bool require_part_metadata) const override; - private: +private: /// Loads marks index granularity into memory void loadIndexGranularity() override; From fb17749b50ce1024ef8c0b6f7bb8b7a58321894c Mon Sep 17 00:00:00 2001 From: Nikolay Monkov Date: Thu, 7 Mar 2024 09:45:24 +0500 Subject: [PATCH 147/985] file has been reformatted to pass Style check --- tests/ci/docker_server.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 803dbfcd92a..35c86d8eadd 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -216,7 +216,9 @@ def gen_tags(version: ClickHouseVersion, release_type: str) -> List[str]: return tags -def buildx_args(urls: Dict[str, str], arch: str, direct_urls: List[str], version: str) -> List[str]: +def buildx_args( + urls: Dict[str, str], arch: str, direct_urls: List[str], version: str +) -> List[str]: args = [ f"--platform=linux/{arch}", f"--label=build-url={GITHUB_RUN_URL}", @@ -268,7 +270,9 @@ def build_and_push_image( urls = [url for url in direct_urls[arch] if ".deb" in url] else: urls = [url for url in direct_urls[arch] if ".tgz" in url] - cmd_args.extend(buildx_args(repo_urls, arch, direct_urls=urls, version=version.describe)) + cmd_args.extend( + buildx_args(repo_urls, arch, direct_urls=urls, version=version.describe) + ) if not push: cmd_args.append(f"--tag={image.repo}:{arch_tag}") cmd_args.extend( From f77b5963748c321975d8bd131e794dcc57002fc8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 7 Mar 2024 16:17:27 +0800 Subject: [PATCH 148/985] Fix test --- .../integration/test_filesystem_cache/test.py | 47 ++++++++++++------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index 0cb1866f8e4..63316aba57e 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -350,6 +350,20 @@ def test_custom_cached_disk(cluster): def test_force_filesystem_cache_on_merges(cluster): def test(node, forced_read_through_cache_on_merge): + def to_int(value): + if value == "": + return 0 + else: + return int(value) + + r_cache_count = to_int(node.query( + "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + )) + + w_cache_count = to_int(node.query( + "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" + )) + node.query( """ DROP TABLE IF EXISTS test SYNC; @@ -376,36 +390,33 @@ def test_force_filesystem_cache_on_merges(cluster): assert int(node.query("SELECT count() FROM system.filesystem_cache")) > 0 assert int(node.query("SELECT max(size) FROM system.filesystem_cache")) == 1024 - write_count = int( + w_cache_count_2 = int( node.query( "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" ) ) - assert write_count > 100000 - assert "" == node.query( - "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + assert w_cache_count_2 > w_cache_count + + r_cache_count_2 = to_int( + node.query( + "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + ) ) + assert r_cache_count_2 == r_cache_count node.query("SYSTEM DROP FILESYSTEM CACHE") node.query("OPTIMIZE TABLE test FINAL") - new_write_count = int( + r_cache_count_3 = to_int( node.query( - "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" - ) - ) - assert new_write_count >= write_count - - if forced_read_through_cache_on_merge: - assert 100000 < int( - node.query( - "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" - ) - ) - else: - assert "" == node.query( "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" ) + ) + + if forced_read_through_cache_on_merge: + assert r_cache_count_3 > r_cache_count + else: + assert r_cache_count_3 == r_cache_count node = cluster.instances["node_force_read_through_cache_on_merge"] test(node, True) From 50b84954e4810c94c1397504a64ca96e1a0fed55 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 7 Mar 2024 16:29:38 +0800 Subject: [PATCH 149/985] Update .reference --- .../0_stateless/02117_show_create_table_system.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 7382b24afbc..5081527ceef 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -195,6 +195,8 @@ CREATE TABLE system.disks `unreserved_space` UInt64, `keep_free_space` UInt64, `type` String, + `object_storage_type` String, + `metadata_type` String, `is_encrypted` UInt8, `is_read_only` UInt8, `is_write_once` UInt8, From c7f5b1631c359c61b6e4c74727092df73e956922 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 7 Mar 2024 08:30:34 +0000 Subject: [PATCH 150/985] Automatic style fix --- tests/integration/test_filesystem_cache/test.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index 63316aba57e..c44d817c57c 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -356,13 +356,17 @@ def test_force_filesystem_cache_on_merges(cluster): else: return int(value) - r_cache_count = to_int(node.query( - "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" - )) + r_cache_count = to_int( + node.query( + "SELECT value FROM system.events WHERE name = 'CachedReadBufferCacheWriteBytes'" + ) + ) - w_cache_count = to_int(node.query( - "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" - )) + w_cache_count = to_int( + node.query( + "SELECT value FROM system.events WHERE name = 'CachedWriteBufferCacheWriteBytes'" + ) + ) node.query( """ From 31ed1966e3c5388e601edd6e97c0497153bb7196 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 7 Mar 2024 16:44:10 +0800 Subject: [PATCH 151/985] Fix build --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 9d7e714445a..46136ad7b12 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -166,7 +166,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name); return object_storage; }); @@ -202,7 +202,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name); return object_storage; }); From 10b5ce8ab3d1b412f6500d03bc96e205965178d7 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 7 Mar 2024 10:26:50 +0100 Subject: [PATCH 152/985] Updated BufferAllocationPolicy --- src/Backups/BackupIO_AzureBlobStorage.cpp | 4 +-- .../BufferAllocationPolicy.cpp} | 25 +++++-------- src/Common/BufferAllocationPolicy.h | 35 +++++++++++++++++++ src/Common/ThreadPoolTaskTracker.h | 2 -- src/Core/Settings.h | 5 +++ .../IO/WriteBufferFromAzureBlobStorage.cpp | 23 +++++++----- .../IO/WriteBufferFromAzureBlobStorage.h | 9 ++--- .../AzureBlobStorage/AzureBlobStorageAuth.cpp | 29 ++++++++------- .../AzureBlobStorage/AzureObjectStorage.cpp | 4 +-- .../AzureBlobStorage/AzureObjectStorage.h | 17 +++++++-- src/IO/WriteBufferFromS3.cpp | 7 +++- src/IO/WriteBufferFromS3.h | 13 +------ 12 files changed, 107 insertions(+), 66 deletions(-) rename src/{IO/WriteBufferFromS3BufferAllocationPolicy.cpp => Common/BufferAllocationPolicy.cpp} (74%) create mode 100644 src/Common/BufferAllocationPolicy.h diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index fb36248433d..8d2b217ad21 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -275,11 +275,9 @@ std::unique_ptr BackupWriterAzureBlobStorage::writeFile(const Strin return std::make_unique( client, key, - settings->max_single_part_upload_size, - settings->max_unexpected_write_error_retries, DBMS_DEFAULT_BUFFER_SIZE, write_settings, - settings->max_inflight_parts_for_one_file); + settings); } void BackupWriterAzureBlobStorage::removeFile(const String & file_name) diff --git a/src/IO/WriteBufferFromS3BufferAllocationPolicy.cpp b/src/Common/BufferAllocationPolicy.cpp similarity index 74% rename from src/IO/WriteBufferFromS3BufferAllocationPolicy.cpp rename to src/Common/BufferAllocationPolicy.cpp index 6347c1acfd7..1456233eb03 100644 --- a/src/IO/WriteBufferFromS3BufferAllocationPolicy.cpp +++ b/src/Common/BufferAllocationPolicy.cpp @@ -1,21 +1,17 @@ -#include "config.h" - -#if USE_AWS_S3 - -#include +#include "BufferAllocationPolicy.h" #include -namespace +namespace DB { -class FixedSizeBufferAllocationPolicy : public DB::WriteBufferFromS3::IBufferAllocationPolicy +class FixedSizeBufferAllocationPolicy : public IBufferAllocationPolicy { const size_t buffer_size = 0; size_t buffer_number = 0; public: - explicit FixedSizeBufferAllocationPolicy(const DB::S3Settings::RequestSettings::PartUploadSettings & settings_) + explicit FixedSizeBufferAllocationPolicy(const BufferAllocationSettings & settings_) : buffer_size(settings_.strict_upload_part_size) { chassert(buffer_size > 0); @@ -36,7 +32,7 @@ public: }; -class ExpBufferAllocationPolicy : public DB::WriteBufferFromS3::IBufferAllocationPolicy +class ExpBufferAllocationPolicy : public DB::IBufferAllocationPolicy { const size_t first_size = 0; const size_t second_size = 0; @@ -49,7 +45,7 @@ class ExpBufferAllocationPolicy : public DB::WriteBufferFromS3::IBufferAllocatio size_t buffer_number = 0; public: - explicit ExpBufferAllocationPolicy(const DB::S3Settings::RequestSettings::PartUploadSettings & settings_) + explicit ExpBufferAllocationPolicy(const BufferAllocationSettings & settings_) : first_size(std::max(settings_.max_single_part_upload_size, settings_.min_upload_part_size)) , second_size(settings_.min_upload_part_size) , multiply_factor(settings_.upload_part_size_multiply_factor) @@ -92,14 +88,10 @@ public: } }; -} -namespace DB -{ +IBufferAllocationPolicy::~IBufferAllocationPolicy() = default; -WriteBufferFromS3::IBufferAllocationPolicy::~IBufferAllocationPolicy() = default; - -WriteBufferFromS3::IBufferAllocationPolicyPtr WriteBufferFromS3::ChooseBufferPolicy(const S3Settings::RequestSettings::PartUploadSettings & settings_) +IBufferAllocationPolicyPtr ChooseBufferPolicy(BufferAllocationSettings settings_) { if (settings_.strict_upload_part_size > 0) return std::make_unique(settings_); @@ -109,4 +101,3 @@ WriteBufferFromS3::IBufferAllocationPolicyPtr WriteBufferFromS3::ChooseBufferPol } -#endif diff --git a/src/Common/BufferAllocationPolicy.h b/src/Common/BufferAllocationPolicy.h new file mode 100644 index 00000000000..b759d22ede6 --- /dev/null +++ b/src/Common/BufferAllocationPolicy.h @@ -0,0 +1,35 @@ +#pragma once + +#include "config.h" + +#include "logger_useful.h" + +#include + +namespace DB +{ + +struct BufferAllocationSettings +{ + size_t strict_upload_part_size = 0; + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t max_single_part_upload_size = 32 * 1024 * 1024; +}; + +class IBufferAllocationPolicy +{ + public: + virtual size_t getBufferNumber() const = 0; + virtual size_t getBufferSize() const = 0; + virtual void nextBuffer() = 0; + virtual ~IBufferAllocationPolicy() = 0; +}; + +using IBufferAllocationPolicyPtr = std::unique_ptr; + +IBufferAllocationPolicyPtr ChooseBufferPolicy(BufferAllocationSettings settings_); + +} diff --git a/src/Common/ThreadPoolTaskTracker.h b/src/Common/ThreadPoolTaskTracker.h index d37b759a913..72591648d30 100644 --- a/src/Common/ThreadPoolTaskTracker.h +++ b/src/Common/ThreadPoolTaskTracker.h @@ -11,8 +11,6 @@ namespace DB { -/// That class is used only in WriteBufferFromS3 for now. -/// Therefore it declared as a part of WriteBufferFromS3. /// TaskTracker takes a Callback which is run by scheduler in some external shared ThreadPool. /// TaskTracker brings the methods waitIfAny, waitAll/safeWaitAll /// to help with coordination of the running tasks. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b186ca6fe01..8ad08b7e348 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -78,10 +78,15 @@ class IColumn; M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, 3, "The maximum number of attempts to connect to replicas.", 0) \ M(UInt64, s3_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to S3 (some implementations does not supports variable size parts).", 0) \ + M(UInt64, azure_strict_upload_part_size, 0, "The exact size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ M(UInt64, s3_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, azure_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to Azure blob storage.", 0) \ + M(UInt64, azure_max_upload_part_size, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage.", 0) \ M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to S3, s3_min_upload_part_size is multiplied by s3_upload_part_size_multiply_factor.", 0) \ + M(UInt64, azure_upload_part_size_multiply_factor, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage.", 0) \ + M(UInt64, azure_upload_part_size_multiply_parts_count_threshold, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, azure_max_inflight_parts_for_one_file, 20, "The maximum number of a concurrent loaded parts in multipart upload request. 0 means unlimited.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 74a8949b235..bc11d445a51 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -27,23 +27,27 @@ struct WriteBufferFromAzureBlobStorage::PartData WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & blob_path_, - size_t max_single_part_upload_size_, - size_t max_unexpected_write_error_retries_, size_t buf_size_, const WriteSettings & write_settings_, - size_t max_inflight_parts_for_one_file_, + std::shared_ptr settings_, ThreadPoolCallbackRunner schedule_) : WriteBufferFromFileBase(buf_size_, nullptr, 0) , log(getLogger("WriteBufferFromAzureBlobStorage")) - , max_single_part_upload_size(max_single_part_upload_size_) - , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) + , buffer_allocation_policy(ChooseBufferPolicy({settings_->strict_upload_part_size, + settings_->min_upload_part_size, + settings_->max_upload_part_size, + settings_->upload_part_size_multiply_factor, + settings_->upload_part_size_multiply_parts_count_threshold, + settings_->max_single_part_upload_size})) + , max_single_part_upload_size(settings_->max_single_part_upload_size) + , max_unexpected_write_error_retries(settings_->max_unexpected_write_error_retries) , blob_path(blob_path_) , write_settings(write_settings_) , blob_container_client(blob_container_client_) , task_tracker( std::make_unique( std::move(schedule_), - max_inflight_parts_for_one_file_, + settings_->max_inflight_parts_for_one_file, limitedLog)) { allocateBuffer(); @@ -119,7 +123,8 @@ void WriteBufferFromAzureBlobStorage::nextImpl() void WriteBufferFromAzureBlobStorage::allocateBuffer() { - memory = Memory(max_single_part_upload_size); + buffer_allocation_policy->nextBuffer(); + memory = Memory(buffer_allocation_policy->getBufferSize()); WriteBuffer::set(memory.data(), memory.size()); } @@ -129,10 +134,10 @@ void WriteBufferFromAzureBlobStorage::reallocateBuffer() if (available() > 0) return; - if (memory.size() == max_single_part_upload_size) + if (memory.size() == buffer_allocation_policy->getBufferSize()) return; - memory.resize(max_single_part_upload_size); + memory.resize(buffer_allocation_policy->getBufferSize()); WriteBuffer::set(memory.data(), memory.size()); diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 4897ca9a846..7223f66693e 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -12,7 +12,8 @@ #include #include #include - +#include +#include namespace Poco { @@ -32,11 +33,9 @@ public: WriteBufferFromAzureBlobStorage( AzureClientPtr blob_container_client_, const String & blob_path_, - size_t max_single_part_upload_size_, - size_t max_unexpected_write_error_retries_, size_t buf_size_, const WriteSettings & write_settings_, - size_t max_inflight_parts_for_one_file_, + std::shared_ptr settings_, ThreadPoolCallbackRunner schedule_ = {}); ~WriteBufferFromAzureBlobStorage() override; @@ -63,6 +62,8 @@ private: LoggerPtr log; LogSeriesLimiterPtr limitedLog = std::make_shared(log, 1, 5); + IBufferAllocationPolicyPtr buffer_allocation_policy; + const size_t max_single_part_upload_size; const size_t max_unexpected_write_error_retries; const std::string blob_path; diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp index f99586b2d1a..1d01e2f45e3 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureBlobStorageAuth.cpp @@ -160,18 +160,23 @@ std::unique_ptr getAzureBlobContainerClient( std::unique_ptr getAzureBlobStorageSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context) { - return std::make_unique( - config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024), - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".max_single_read_retries", 3), - config.getInt(config_prefix + ".max_single_download_retries", 3), - config.getInt(config_prefix + ".list_object_keys_size", 1000), - config.getUInt64(config_prefix + ".max_upload_part_size", 5ULL * 1024 * 1024 * 1024), - config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size), - config.getBool(config_prefix + ".use_native_copy", false), - config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries), - config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file) - ); + std::unique_ptr settings = std::make_unique(); + settings->max_single_part_upload_size = config.getUInt64(config_prefix + ".max_single_part_upload_size", 100 * 1024 * 1024); + settings->min_bytes_for_seek = config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024); + settings->max_single_read_retries = config.getInt(config_prefix + ".max_single_read_retries", 3); + settings->max_single_download_retries = config.getInt(config_prefix + ".max_single_download_retries", 3); + settings->list_object_keys_size = config.getInt(config_prefix + ".list_object_keys_size", 1000); + settings->min_upload_part_size = config.getUInt64(config_prefix + ".min_upload_part_size", context->getSettings().azure_min_upload_part_size); + settings->max_upload_part_size = config.getUInt64(config_prefix + ".max_upload_part_size", context->getSettings().azure_max_upload_part_size); + settings->max_single_part_copy_size = config.getUInt64(config_prefix + ".max_single_part_copy_size", context->getSettings().azure_max_single_part_copy_size); + settings->use_native_copy = config.getBool(config_prefix + ".use_native_copy", false); + settings->max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".max_unexpected_write_error_retries", context->getSettings().azure_max_unexpected_write_error_retries); + settings->max_inflight_parts_for_one_file = config.getUInt64(config_prefix + ".max_inflight_parts_for_one_file", context->getSettings().azure_max_inflight_parts_for_one_file); + settings->strict_upload_part_size = config.getUInt64(config_prefix + ".strict_upload_part_size", context->getSettings().azure_strict_upload_part_size); + settings->upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".upload_part_size_multiply_factor", context->getSettings().azure_upload_part_size_multiply_factor); + settings->upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".upload_part_size_multiply_parts_count_threshold", context->getSettings().azure_upload_part_size_multiply_parts_count_threshold); + + return settings; } } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 844789ea5b5..15ab55d5611 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -265,11 +265,9 @@ std::unique_ptr AzureObjectStorage::writeObject( /// NO return std::make_unique( client.get(), object.remote_path, - settings.get()->max_single_part_upload_size, - settings.get()->max_unexpected_write_error_retries, buf_size, patchSettings(write_settings), - settings.get()->max_inflight_parts_for_one_file); + settings.get()); } /// Remove file. Throws exception if file doesn't exists or it's a directory. diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 1b473a01304..b97d706a4d9 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -24,21 +24,29 @@ struct AzureObjectStorageSettings int max_single_read_retries_, int max_single_download_retries_, int list_object_keys_size_, + size_t min_upload_part_size_, size_t max_upload_part_size_, size_t max_single_part_copy_size_, bool use_native_copy_, size_t max_unexpected_write_error_retries_, - size_t max_inflight_parts_for_one_file_) + size_t max_inflight_parts_for_one_file_, + size_t strict_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_parts_count_threshold_) : max_single_part_upload_size(max_single_part_upload_size_) , min_bytes_for_seek(min_bytes_for_seek_) , max_single_read_retries(max_single_read_retries_) , max_single_download_retries(max_single_download_retries_) , list_object_keys_size(list_object_keys_size_) + , min_upload_part_size(min_upload_part_size_) , max_upload_part_size(max_upload_part_size_) , max_single_part_copy_size(max_single_part_copy_size_) , use_native_copy(use_native_copy_) - , max_unexpected_write_error_retries (max_unexpected_write_error_retries_) - , max_inflight_parts_for_one_file (max_inflight_parts_for_one_file_) + , max_unexpected_write_error_retries(max_unexpected_write_error_retries_) + , max_inflight_parts_for_one_file(max_inflight_parts_for_one_file_) + , strict_upload_part_size(strict_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) { } @@ -55,6 +63,9 @@ struct AzureObjectStorageSettings bool use_native_copy = false; size_t max_unexpected_write_error_retries = 4; size_t max_inflight_parts_for_one_file = 20; + size_t strict_upload_part_size = 0; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; }; using AzureClient = Azure::Storage::Blobs::BlobContainerClient; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 510d9bef4d3..60fa828d6c4 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -93,7 +93,12 @@ WriteBufferFromS3::WriteBufferFromS3( , write_settings(write_settings_) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , buffer_allocation_policy(ChooseBufferPolicy(upload_settings)) + , buffer_allocation_policy(ChooseBufferPolicy({upload_settings.strict_upload_part_size, + upload_settings.min_upload_part_size, + upload_settings.max_upload_part_size, + upload_settings.upload_part_size_multiply_factor, + upload_settings.upload_part_size_multiply_parts_count_threshold, + upload_settings.max_single_part_upload_size})) , task_tracker( std::make_unique( std::move(schedule_), diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index afd8b9909c1..840274c8ace 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -49,18 +50,6 @@ public: std::string getFileName() const override { return key; } void sync() override { next(); } - class IBufferAllocationPolicy - { - public: - virtual size_t getBufferNumber() const = 0; - virtual size_t getBufferSize() const = 0; - virtual void nextBuffer() = 0; - virtual ~IBufferAllocationPolicy() = 0; - }; - using IBufferAllocationPolicyPtr = std::unique_ptr; - - static IBufferAllocationPolicyPtr ChooseBufferPolicy(const S3Settings::RequestSettings::PartUploadSettings & settings_); - private: /// Receives response from the server after sending all data. void finalizeImpl() override; From f2a3ffe9eb79046093e77ed39f2366754e7a8ba2 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 7 Mar 2024 17:14:12 +0800 Subject: [PATCH 153/985] Replace some headers with forward decl --- src/Backups/BackupCoordinationRemote.cpp | 1 + .../NamedCollections/NamedCollectionUtils.cpp | 1 + .../ObjectStorages/ObjectStorageFactory.cpp | 3 +++ src/Formats/ReadSchemaUtils.cpp | 1 + src/Interpreters/DatabaseCatalog.cpp | 1 - src/Interpreters/DatabaseCatalog.h | 10 +++---- src/Processors/QueryPlan/AggregatingStep.cpp | 1 + src/Processors/QueryPlan/CubeStep.cpp | 1 + src/Storages/StorageAzureBlob.cpp | 4 +++ src/Storages/StorageS3.h | 27 ++++++++++--------- 10 files changed, 29 insertions(+), 21 deletions(-) diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 9c509858b2a..b869f890f56 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp index fe0f42467c7..e3ff50f5e3f 100644 --- a/src/Common/NamedCollections/NamedCollectionUtils.cpp +++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp @@ -17,6 +17,7 @@ #include #include +#include namespace fs = std::filesystem; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 4f198be64fe..5fae257e8d4 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -21,6 +21,9 @@ #include #include +#include + +namespace fs = std::filesystem; namespace DB { diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 5badf4301bf..736a35927c3 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a9fd5c852ba..a5a523b658b 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -26,7 +26,6 @@ #include #include -#include "Interpreters/Context_fwd.h" #include "config.h" #if USE_MYSQL diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 4fe114cc493..6995fc51941 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -1,15 +1,14 @@ #pragma once #include +#include +#include #include #include -#include #include #include -#include "Common/NamePrompter.h" +#include #include -#include "Storages/IStorage.h" -#include "Databases/IDatabase.h" #include #include @@ -23,9 +22,6 @@ #include #include #include -#include - -namespace fs = std::filesystem; namespace DB { diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index f374a7b7b10..a76bacdd97b 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 0c632c346c7..bf2ce148529 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 1f0fba99f84..2d4f1db04a1 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -41,6 +41,10 @@ #include #include +#include + +namespace fs = std::filesystem; + using namespace Azure::Storage::Blobs; namespace CurrentMetrics diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 65fb3b51be2..bf81ead0599 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -4,27 +4,28 @@ #if USE_AWS_S3 -#include - #include - -#include -#include - -#include -#include -#include -#include -#include +#include #include +#include +#include #include #include -#include +#include +#include +#include #include +#include #include #include +#include #include -#include +#include +#include + +#include + +namespace fs = std::filesystem; namespace Aws::S3 { From f0a8d8843de5dffae2e1d4476fb119ad34059340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 7 Mar 2024 00:10:06 +0100 Subject: [PATCH 154/985] Not x86_64 are lagging behind in features --- base/base/itoa.h | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/base/base/itoa.h b/base/base/itoa.h index c450090d82f..a36eecaf1e5 100644 --- a/base/base/itoa.h +++ b/base/base/itoa.h @@ -351,12 +351,20 @@ namespace convert template static inline char * writeUIntText(T _x, char * p) { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wbit-int-extension" int len = digits10(_x); static_assert(std::is_same_v || std::is_same_v); - using T_ = std::conditional_t, unsigned __int128, unsigned _BitInt(256)>; + using T_ = std::conditional_t< + std::is_same_v, + unsigned __int128, +#if defined(__x86_64__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wbit-int-extension" + unsigned _BitInt(256) #pragma clang diagnostic pop +#else + T +#endif + >; T_ x; T_ hundred(100ULL); @@ -366,8 +374,12 @@ static inline char * writeUIntText(T _x, char * p) } else { +#if defined(__x86_64__) x = (T_(_x.items[T::_impl::little(3)]) << 192) + (T_(_x.items[T::_impl::little(2)]) << 128) + (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); +#else + x = _x; +#endif } auto * pp = p + len; From 66dea5111298abd4301df55b5615d158105fe78f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:40:48 +0100 Subject: [PATCH 155/985] fix clang-tidy --- src/Functions/array/arrayDotProduct.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index f9a6687e028..8b7c85e05dd 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -322,18 +322,18 @@ private: const auto & offsets_y = array_y.getOffsets(); ColumnArray::Offset prev_offset = 0; - for (size_t row = 0; row < offsets_y.size(); ++row) + for (auto offset_y : offsets_y) { - if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]] + if (offsets_x[0] != offset_y - prev_offset) [[unlikely]] { throw Exception( ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arguments of function {} have different array sizes: {} and {}", getName(), offsets_x[0], - offsets_y[row] - prev_offset); + offset_y - prev_offset); } - prev_offset = offsets_y[row]; + prev_offset = offset_y; } auto col_res = ColumnVector::create(); From 6c69e7d4dcfdfa21cfcaa103fc1cc7c53dfe0291 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Thu, 7 Mar 2024 20:29:04 +0800 Subject: [PATCH 156/985] detect output format by file extension in clickhouse-local --- programs/local/LocalServer.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 68f0e52ce08..20974dd9751 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -327,6 +327,14 @@ static bool checkIfStdinIsRegularFile() return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); } + +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + std::string LocalServer::getInitialCreateTableQuery() { if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty())) @@ -638,7 +646,14 @@ void LocalServer::processConfig() if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); - format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); + if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile()) + { + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TSV"; + } + else + format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); insert_format = "Values"; /// Setting value from cmd arg overrides one from config From 6d5fd2857ed50047d8acf48766165aa815ca30b9 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Thu, 7 Mar 2024 20:29:42 +0800 Subject: [PATCH 157/985] detect output format by file extension in clickhouse-client --- programs/client/Client.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a2bd6b6016a..fac34003553 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -50,6 +50,7 @@ #include #include #include +#include namespace fs = std::filesystem; using namespace std::literals; @@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description, } +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + void Client::processConfig() { if (!queries.empty() && config().has("queries-file")) @@ -1173,7 +1181,14 @@ void Client::processConfig() pager = config().getString("pager", ""); is_default_format = !config().has("vertical") && !config().has("format"); - if (config().has("vertical")) + if (is_default_format && checkIfStdoutIsRegularFile()) + { + is_default_format = false; + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TabSeparated"; + } + else if (config().has("vertical")) format = config().getString("format", "Vertical"); else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); From 112c1efb7da2619cb67a48ff7fbe65ecea8e44a9 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Thu, 7 Mar 2024 20:30:24 +0800 Subject: [PATCH 158/985] test detect output format by file extension --- ..._output_format_by_file_extension.reference | 20 +++++++++++++++++++ ..._detect_output_format_by_file_extension.sh | 13 ++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference create mode 100755 tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh diff --git a/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference new file mode 100644 index 00000000000..7b36cc96f5e --- /dev/null +++ b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh new file mode 100755 index 00000000000..ec1edd710a1 --- /dev/null +++ b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet +$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet + +$CLICKHOUSE_CLIENT -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet +$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet From 930deee699be05398aac334ce9e025d084c68a30 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 7 Mar 2024 22:02:10 +0800 Subject: [PATCH 159/985] fix bugs --- src/Columns/ColumnArray.cpp | 63 ++++++++++++++++++------------------- src/Columns/ColumnArray.h | 2 +- 2 files changed, 32 insertions(+), 33 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 44b17c89ae1..0214375122f 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -43,29 +43,34 @@ namespace ErrorCodes static constexpr size_t max_array_size_as_field = 1000000; -ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column) +ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column, bool check_offsets) : data(std::move(nested_column)), offsets(std::move(offsets_column)) { - const ColumnOffsets * offsets_concrete = typeid_cast(offsets.get()); - - if (!offsets_concrete) - throw Exception(ErrorCodes::LOGICAL_ERROR, "offsets_column must be a ColumnUInt64"); - - if (!offsets_concrete->empty() && data && !data->empty()) + if (check_offsets) { - Offset last_offset = offsets_concrete->getData().back(); + const ColumnOffsets * offsets_concrete = typeid_cast(offsets.get()); - /// This will also prevent possible overflow in offset. - if (data->size() != last_offset) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "offsets_column has data inconsistent with nested_column. Data size: {}, last offset: {}", - data->size(), last_offset); + if (!offsets_concrete) + throw Exception(ErrorCodes::LOGICAL_ERROR, "offsets_column must be a ColumnUInt64"); + + if (!offsets_concrete->empty() && data && !data->empty()) + { + Offset last_offset = offsets_concrete->getData().back(); + + /// This will also prevent possible overflow in offset. + if (data->size() != last_offset) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "offsets_column has data inconsistent with nested_column. Data size: {}, last offset: {}", + data->size(), + last_offset); + } + + /** NOTE + * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). + * But in most cases, arrays with constant value are unexpected and code will work wrong. Use with caution. + */ } - - /** NOTE - * Arrays with constant value are possible and used in implementation of higher order functions (see FunctionReplicate). - * But in most cases, arrays with constant value are unexpected and code will work wrong. Use with caution. - */ } ColumnArray::ColumnArray(MutableColumnPtr && nested_column) @@ -425,20 +430,14 @@ void ColumnArray::insertManyFromTuple(const ColumnArray & src, size_t position, if (tuple_size != src_tuple_size) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Nested tuple size mismatch: {} vs {}", tuple_size, src_tuple_size); - MutableColumns temporary_arrays(tuple_size); - Columns src_temporary_arrays(tuple_size); - for (size_t i = 0; i < tuple_size; ++i) - { - temporary_arrays[i] = ColumnArray::create(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable()); - src_temporary_arrays[i] = ColumnArray::create(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); - assert_cast(*temporary_arrays[i]) - .insertManyFromImpl(assert_cast(*src_temporary_arrays[i]), position, length, false); - } - Columns tuple_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) - tuple_columns[i] = assert_cast(*temporary_arrays[i]).getDataPtr(); - + { + auto array_of_element = ColumnArray(tuple.getColumn(i).assumeMutable(), getOffsetsPtr()->assumeMutable(), false); + auto src_array_of_element = ColumnArray(src_tuple.getColumn(i).assumeMutable(), src.getOffsetsPtr()->assumeMutable()); + array_of_element.insertManyFromImpl(src_array_of_element, position, length, false); + tuple_columns[i] = array_of_element.getDataPtr(); + } getDataPtr() = ColumnTuple::create(std::move(tuple_columns)); } @@ -448,12 +447,12 @@ void ColumnArray::insertManyFromNullable(const ColumnArray & src, size_t positio const ColumnNullable & src_nullable = assert_cast(src.getData()); /// Process nested column without updating array offsets - auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto array_of_nested = ColumnArray(nullable.getNestedColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable(), false); auto src_array_of_nested = ColumnArray(src_nullable.getNestedColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); array_of_nested.insertManyFromImpl(src_array_of_nested, position, length, false); /// Process null map column without updating array offsets - auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable()); + auto array_of_null_map = ColumnArray(nullable.getNullMapColumnPtr()->assumeMutable(), getOffsetsPtr()->assumeMutable(), false); auto src_array_of_null_map = ColumnArray(src_nullable.getNullMapColumnPtr()->assumeMutable(), src.getOffsetsPtr()->assumeMutable()); array_of_null_map.insertManyFromImpl(src_array_of_null_map, position, length, false); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 765f86ec552..8c4d103e7d0 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -21,7 +21,7 @@ private: friend class COWHelper, ColumnArray>; /** Create an array column with specified values and offsets. */ - ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column); + ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column, bool check_offsets = true); /** Create an empty column of arrays with the type of values as in the column `nested_column` */ explicit ColumnArray(MutableColumnPtr && nested_column); From cd9d9018e0db8139e48cb722e9e9685d2a212c8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 7 Mar 2024 17:15:42 +0100 Subject: [PATCH 160/985] Be able to iterate --- base/base/CMakeLists.txt | 1 + base/base/itoa.cpp | 503 +++++++++++++++++++++++++++++++++++ base/base/itoa.h | 498 +++------------------------------- src/Functions/CMakeLists.txt | 1 + 4 files changed, 540 insertions(+), 463 deletions(-) create mode 100644 base/base/itoa.cpp diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 548ba01d86a..55d046767b8 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -19,6 +19,7 @@ set (SRCS getPageSize.cpp getThreadId.cpp int8_to_string.cpp + itoa.cpp JSON.cpp mremap.cpp phdr_cache.cpp diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp new file mode 100644 index 00000000000..9fefc9f0f07 --- /dev/null +++ b/base/base/itoa.cpp @@ -0,0 +1,503 @@ +// Based on https://github.com/amdn/itoa and combined with our optimizations +// +//=== itoa.h - Fast integer to ascii conversion --*- C++ -*-// +// +// The MIT License (MIT) +// Copyright (c) 2016 Arturo Martin-de-Nicolas +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include + + +template +int digits10(T x) +{ + if (x < T(10ULL)) + return 1; + if (x < T(100ULL)) + return 2; + if constexpr (sizeof(T) == 1) + return 3; + else + { + if (x < T(1000ULL)) + return 3; + + if (x < T(1000000000000ULL)) + { + if (x < T(100000000ULL)) + { + if (x < T(1000000ULL)) + { + if (x < T(10000ULL)) + return 4; + else + return 5 + (x >= T(100000ULL)); + } + + return 7 + (x >= T(10000000ULL)); + } + + if (x < T(10000000000ULL)) + return 9 + (x >= T(1000000000ULL)); + + return 11 + (x >= T(100000000000ULL)); + } + + return 12 + digits10(x / T(1000000000000ULL)); + } +} + + +namespace +{ + +template +static constexpr T pow10(size_t x) +{ + return x ? 10 * pow10(x - 1) : 1; +} + +// Division by a power of 10 is implemented using a multiplicative inverse. +// This strength reduction is also done by optimizing compilers, but +// presently the fastest results are produced by using the values +// for the multiplication and the shift as given by the algorithm +// described by Agner Fog in "Optimizing Subroutines in Assembly Language" +// +// http://www.agner.org/optimize/optimizing_assembly.pdf +// +// "Integer division by a constant (all processors) +// A floating point number can be divided by a constant by multiplying +// with the reciprocal. If we want to do the same with integers, we have +// to scale the reciprocal by 2n and then shift the product to the right +// by n. There are various algorithms for finding a suitable value of n +// and compensating for rounding errors. The algorithm described below +// was invented by Terje Mathisen, Norway, and not published elsewhere." + +/// Division by constant is performed by: +/// 1. Adding 1 if needed; +/// 2. Multiplying by another constant; +/// 3. Shifting right by another constant. +template +struct Division +{ + static constexpr bool add{add_}; + static constexpr UInt multiplier{multiplier_}; + static constexpr unsigned shift{shift_}; +}; + +/// Select a type with appropriate number of bytes from the list of types. +/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. +/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. +template +struct SelectType +{ + using Result = typename SelectType::Result; +}; + +template +struct SelectType<1, T, Ts...> +{ + using Result = T; +}; + + +/// Division by 10^N where N is the size of the type. +template +using DivisionBy10PowN = typename SelectType< + N, + Division, /// divide by 10 + Division, /// divide by 100 + Division, /// divide by 10000 + Division /// divide by 100000000 + >::Result; + +template +using UnsignedOfSize = typename SelectType::Result; + +/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in +template +struct QuotientAndRemainder +{ + UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits + UnsignedOfSize remainder; // remainder with at most N decimal digits +}; + +template +QuotientAndRemainder static inline split(UnsignedOfSize value) +{ + constexpr DivisionBy10PowN division; + + UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; + UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); + + return {quotient, remainder}; +} + + +static inline char * outDigit(char * p, uint8_t value) +{ + *p = '0' + value; + ++p; + return p; +} + +// Using a lookup table to convert binary numbers from 0 to 99 +// into ascii characters as described by Andrei Alexandrescu in +// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ + +static const char digits[201] = "00010203040506070809" + "10111213141516171819" + "20212223242526272829" + "30313233343536373839" + "40414243444546474849" + "50515253545556575859" + "60616263646566676869" + "70717273747576777879" + "80818283848586878889" + "90919293949596979899"; + +static inline char * outTwoDigits(char * p, uint8_t value) +{ + memcpy(p, &digits[value * 2], 2); + p += 2; + return p; +} + + +namespace convert +{ +template +static char * head(char * p, UInt u); +template +static char * tail(char * p, UInt u); + +//===----------------------------------------------------------===// +// head: find most significant digit, skip leading zeros +//===----------------------------------------------------------===// + +// "x" contains quotient and remainder after division by 10^N +// quotient is less than 10^N +template +static inline char * head(char * p, QuotientAndRemainder x) +{ + p = head(p, UnsignedOfSize(x.quotient)); + p = tail(p, x.remainder); + return p; +} + +// "u" is less than 10^2*N +template +static inline char * head(char * p, UInt u) +{ + return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); +} + +// recursion base case, selected when "u" is one byte +template <> +inline char * head, 1>(char * p, UnsignedOfSize<1> u) +{ + return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); +} + +//===----------------------------------------------------------===// +// tail: produce all digits including leading zeros +//===----------------------------------------------------------===// + +// recursive step, "u" is less than 10^2*N +template +static inline char * tail(char * p, UInt u) +{ + QuotientAndRemainder x = split(u); + p = tail(p, UnsignedOfSize(x.quotient)); + p = tail(p, x.remainder); + return p; +} + +// recursion base case, selected when "u" is one byte +template <> +inline char * tail, 1>(char * p, UnsignedOfSize<1> u) +{ + return outTwoDigits(p, u); +} + +//===----------------------------------------------------------===// +// large values are >= 10^2*N +// where x contains quotient and remainder after division by 10^N +//===----------------------------------------------------------===// + +template +static inline char * large(char * p, QuotientAndRemainder x) +{ + QuotientAndRemainder y = split(x.quotient); + p = head(p, UnsignedOfSize(y.quotient)); + p = tail(p, y.remainder); + p = tail(p, x.remainder); + return p; +} + +//===----------------------------------------------------------===// +// handle values of "u" that might be >= 10^2*N +// where N is the size of "u" in bytes +//===----------------------------------------------------------===// + +template +static inline char * uitoa(char * p, UInt u) +{ + if (u < pow10>(N)) + return head(p, UnsignedOfSize(u)); + QuotientAndRemainder x = split(u); + + return u < pow10>(2 * N) ? head(p, x) : large(p, x); +} + +// selected when "u" is one byte +template <> +inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) +{ + if (u < 10) + return outDigit(p, u); + else if (u < 100) + return outTwoDigits(p, u); + else + { + p = outDigit(p, u / 100); + p = outTwoDigits(p, u % 100); + return p; + } +} + +//===----------------------------------------------------------===// +// handle unsigned and signed integral operands +//===----------------------------------------------------------===// + +// itoa: handle unsigned integral operands (selected by SFINAE) +template && std::is_integral_v> * = nullptr> +static inline char * itoa(U u, char * p) +{ + return convert::uitoa(p, u); +} + +// itoa: handle signed integral operands (selected by SFINAE) +template && std::is_integral_v> * = nullptr> +static inline char * itoa(I i, char * p) +{ + // Need "mask" to be filled with a copy of the sign bit. + // If "i" is a negative value, then the result of "operator >>" + // is implementation-defined, though usually it is an arithmetic + // right shift that replicates the sign bit. + // Use a conditional expression to be portable, + // a good optimizing compiler generates an arithmetic right shift + // and avoids the conditional branch. + UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; + // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. + // Cannot use std::abs() because the result is undefined + // in 2's complement systems for the most-negative value. + // Want to avoid conditional branch for performance reasons since + // CPU branch prediction will be ineffective when negative values + // occur randomly. + // Let "u" be "i" cast to unsigned type UnsignedOfSize. + // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. + // This yields the absolute value with the desired type without + // using a conditional branch and without invoking undefined or + // implementation defined behavior: + UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); + // Unconditionally store a minus sign when producing digits + // in a forward direction and increment the pointer only if + // the value is in fact negative. + // This avoids a conditional branch and is safe because we will + // always produce at least one digit and it will overwrite the + // minus sign when the value is not negative. + *p = '-'; + p += (mask & 1); + p = convert::uitoa(p, u); + return p; +} +} + + +template +static NO_INLINE char * writeUIntText(T _x, char * p) +{ + static_assert(std::is_same_v || std::is_same_v); + using T_ = std::conditional_t< + std::is_same_v, + unsigned __int128, +#if defined(__x86_64__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wbit-int-extension" + unsigned _BitInt(256) +# pragma clang diagnostic pop +#else + T +#endif + >; + + T_ x; + T_ hundred(100ULL); + if constexpr (std::is_same_v) + { + x = (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); + } + else + { +#if defined(__x86_64__) + x = (T_(_x.items[T::_impl::little(3)]) << 192) + (T_(_x.items[T::_impl::little(2)]) << 128) + + (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); +#else + x = _x; +#endif + } + + int len = digits10(x); + auto * pp = p + len; + while (x >= hundred) + { + const auto i = x % hundred; + x /= hundred; + pp -= 2; + outTwoDigits(pp, i); + } + if (x < 10) + *p = '0' + x; + else + outTwoDigits(p, x); + return p + len; +} + +static ALWAYS_INLINE inline char * writeLeadingMinus(char * pos) +{ + *pos = '-'; + return pos + 1; +} + +template +static ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) +{ + static_assert(std::is_same_v || std::is_same_v); + + using UnsignedT = make_unsigned_t; + static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1); + + if (unlikely(x == min_int)) + { + if constexpr (std::is_same_v) + { + const char * res = "-170141183460469231731687303715884105728"; + memcpy(pos, res, strlen(res)); + return pos + strlen(res); + } + else if constexpr (std::is_same_v) + { + const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968"; + memcpy(pos, res, strlen(res)); + return pos + strlen(res); + } + } + + if (x < 0) + { + x = -x; + pos = writeLeadingMinus(pos); + } + return writeUIntText(UnsignedT(x), pos); +} +} + +template +char * itoa(T i, char * p) +{ + return convert::itoa(i, p); +} + +template <> +char * itoa(UInt8 i, char * p) +{ + return convert::itoa(uint8_t(i), p); +} + +template <> +char * itoa(Int8 i, char * p) +{ + return convert::itoa(int8_t(i), p); +} + +template <> +char * itoa(UInt128 i, char * p) +{ + return writeUIntText(i, p); +} + +template <> +char * itoa(Int128 i, char * p) +{ + return writeSIntText(i, p); +} + +template <> +char * itoa(UInt256 i, char * p) +{ + return writeUIntText(i, p); +} + +template <> +char * itoa(Int256 i, char * p) +{ + return writeSIntText(i, p); +} + +#define FOR_MISSING_INTEGER_TYPES(M) \ + M(int8_t) \ + M(uint8_t) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(Int16) \ + M(Int32) \ + M(Int64) + +#define INSTANTIATION(T) template char * itoa(T i, char * p); +FOR_MISSING_INTEGER_TYPES(INSTANTIATION) + +#undef FOR_MISSING_INTEGER_TYPES +#undef INSTANTIATION + + +#define DIGITS_INTEGER_TYPES(M) \ + M(uint8_t) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(UInt128) \ + M(UInt256) + +#define INSTANTIATION(T) template int digits10(T x); +DIGITS_INTEGER_TYPES(INSTANTIATION) + +#undef DIGITS_INTEGER_TYPES +#undef INSTANTIATION diff --git a/base/base/itoa.h b/base/base/itoa.h index a36eecaf1e5..71603cdeb88 100644 --- a/base/base/itoa.h +++ b/base/base/itoa.h @@ -1,474 +1,46 @@ #pragma once -// Based on https://github.com/amdn/itoa and combined with our optimizations -// -//=== itoa.h - Fast integer to ascii conversion --*- C++ -*-// -// -// The MIT License (MIT) -// Copyright (c) 2016 Arturo Martin-de-Nicolas -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include #include +template char * itoa(T i, char * p); -template -inline int digits10(T x) -{ - if (x < 10ULL) - return 1; - if (x < 100ULL) - return 2; - if (x < 1000ULL) - return 3; +template <> char * itoa(UInt8 i, char * p); +template <> char * itoa(Int8 i, char * p); +template <> char * itoa(UInt128 i, char * p); +template <> char * itoa(Int128 i, char * p); +template <> char * itoa(UInt256 i, char * p); +template <> char * itoa(Int256 i, char * p); - if (x < 1000000000000ULL) - { - if (x < 100000000ULL) - { - if (x < 1000000ULL) - { - if (x < 10000ULL) - return 4; - else - return 5 + (x >= 100000ULL); - } +#define FOR_MISSING_INTEGER_TYPES(M) \ + M(int8_t) \ + M(uint8_t) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(Int16) \ + M(Int32) \ + M(Int64) - return 7 + (x >= 10000000ULL); - } +#define INSTANTIATION(T) \ + extern template char * itoa(T i, char * p); +FOR_MISSING_INTEGER_TYPES(INSTANTIATION) - if (x < 10000000000ULL) - return 9 + (x >= 1000000000ULL); - - return 11 + (x >= 100000000000ULL); - } - - return 12 + digits10(x / 1000000000000ULL); -} +#undef FOR_MISSING_INTEGER_TYPES +#undef INSTANTIATION -namespace impl -{ +template int digits10(T x); -template -static constexpr T pow10(size_t x) -{ - return x ? 10 * pow10(x - 1) : 1; -} - -// Division by a power of 10 is implemented using a multiplicative inverse. -// This strength reduction is also done by optimizing compilers, but -// presently the fastest results are produced by using the values -// for the multiplication and the shift as given by the algorithm -// described by Agner Fog in "Optimizing Subroutines in Assembly Language" -// -// http://www.agner.org/optimize/optimizing_assembly.pdf -// -// "Integer division by a constant (all processors) -// A floating point number can be divided by a constant by multiplying -// with the reciprocal. If we want to do the same with integers, we have -// to scale the reciprocal by 2n and then shift the product to the right -// by n. There are various algorithms for finding a suitable value of n -// and compensating for rounding errors. The algorithm described below -// was invented by Terje Mathisen, Norway, and not published elsewhere." - -/// Division by constant is performed by: -/// 1. Adding 1 if needed; -/// 2. Multiplying by another constant; -/// 3. Shifting right by another constant. -template -struct Division -{ - static constexpr bool add{add_}; - static constexpr UInt multiplier{multiplier_}; - static constexpr unsigned shift{shift_}; -}; - -/// Select a type with appropriate number of bytes from the list of types. -/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. -/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. -template -struct SelectType -{ - using Result = typename SelectType::Result; -}; - -template -struct SelectType<1, T, Ts...> -{ - using Result = T; -}; - - -/// Division by 10^N where N is the size of the type. -template -using DivisionBy10PowN = typename SelectType -< - N, - Division, /// divide by 10 - Division, /// divide by 100 - Division, /// divide by 10000 - Division /// divide by 100000000 ->::Result; - -template -using UnsignedOfSize = typename SelectType -< - N, - uint8_t, - uint16_t, - uint32_t, - uint64_t, - __uint128_t ->::Result; - -/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in -template -struct QuotientAndRemainder -{ - UnsignedOfSize quotient; // quotient with fewer than 2*N decimal digits - UnsignedOfSize remainder; // remainder with at most N decimal digits -}; - -template -QuotientAndRemainder static inline split(UnsignedOfSize value) -{ - constexpr DivisionBy10PowN division; - - UnsignedOfSize quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; - UnsignedOfSize remainder = static_cast>(value - quotient * pow10>(N)); - - return {quotient, remainder}; -} - - -static inline char * outDigit(char * p, uint8_t value) -{ - *p = '0' + value; - ++p; - return p; -} - -// Using a lookup table to convert binary numbers from 0 to 99 -// into ascii characters as described by Andrei Alexandrescu in -// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ - -static const char digits[201] = "00010203040506070809" - "10111213141516171819" - "20212223242526272829" - "30313233343536373839" - "40414243444546474849" - "50515253545556575859" - "60616263646566676869" - "70717273747576777879" - "80818283848586878889" - "90919293949596979899"; - -static inline char * outTwoDigits(char * p, uint8_t value) -{ - memcpy(p, &digits[value * 2], 2); - p += 2; - return p; -} - - -namespace convert -{ - template static char * head(char * p, UInt u); - template static char * tail(char * p, UInt u); - - //===----------------------------------------------------------===// - // head: find most significant digit, skip leading zeros - //===----------------------------------------------------------===// - - // "x" contains quotient and remainder after division by 10^N - // quotient is less than 10^N - template - static inline char * head(char * p, QuotientAndRemainder x) - { - p = head(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; - } - - // "u" is less than 10^2*N - template - static inline char * head(char * p, UInt u) - { - return u < pow10>(N) - ? head(p, UnsignedOfSize(u)) - : head(p, split(u)); - } - - // recursion base case, selected when "u" is one byte - template <> - inline char * head, 1>(char * p, UnsignedOfSize<1> u) - { - return u < 10 - ? outDigit(p, u) - : outTwoDigits(p, u); - } - - //===----------------------------------------------------------===// - // tail: produce all digits including leading zeros - //===----------------------------------------------------------===// - - // recursive step, "u" is less than 10^2*N - template - static inline char * tail(char * p, UInt u) - { - QuotientAndRemainder x = split(u); - p = tail(p, UnsignedOfSize(x.quotient)); - p = tail(p, x.remainder); - return p; - } - - // recursion base case, selected when "u" is one byte - template <> - inline char * tail, 1>(char * p, UnsignedOfSize<1> u) - { - return outTwoDigits(p, u); - } - - //===----------------------------------------------------------===// - // large values are >= 10^2*N - // where x contains quotient and remainder after division by 10^N - //===----------------------------------------------------------===// - - template - static inline char * large(char * p, QuotientAndRemainder x) - { - QuotientAndRemainder y = split(x.quotient); - p = head(p, UnsignedOfSize(y.quotient)); - p = tail(p, y.remainder); - p = tail(p, x.remainder); - return p; - } - - //===----------------------------------------------------------===// - // handle values of "u" that might be >= 10^2*N - // where N is the size of "u" in bytes - //===----------------------------------------------------------===// - - template - static inline char * uitoa(char * p, UInt u) - { - if (u < pow10>(N)) - return head(p, UnsignedOfSize(u)); - QuotientAndRemainder x = split(u); - - return u < pow10>(2 * N) - ? head(p, x) - : large(p, x); - } - - // selected when "u" is one byte - template <> - inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) - { - if (u < 10) - return outDigit(p, u); - else if (u < 100) - return outTwoDigits(p, u); - else - { - p = outDigit(p, u / 100); - p = outTwoDigits(p, u % 100); - return p; - } - } - - //===----------------------------------------------------------===// - // handle unsigned and signed integral operands - //===----------------------------------------------------------===// - - // itoa: handle unsigned integral operands (selected by SFINAE) - template && std::is_integral_v> * = nullptr> - static inline char * itoa(U u, char * p) - { - return convert::uitoa(p, u); - } - - // itoa: handle signed integral operands (selected by SFINAE) - template && std::is_integral_v> * = nullptr> - static inline char * itoa(I i, char * p) - { - // Need "mask" to be filled with a copy of the sign bit. - // If "i" is a negative value, then the result of "operator >>" - // is implementation-defined, though usually it is an arithmetic - // right shift that replicates the sign bit. - // Use a conditional expression to be portable, - // a good optimizing compiler generates an arithmetic right shift - // and avoids the conditional branch. - UnsignedOfSize mask = i < 0 ? ~UnsignedOfSize(0) : 0; - // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize. - // Cannot use std::abs() because the result is undefined - // in 2's complement systems for the most-negative value. - // Want to avoid conditional branch for performance reasons since - // CPU branch prediction will be ineffective when negative values - // occur randomly. - // Let "u" be "i" cast to unsigned type UnsignedOfSize. - // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. - // This yields the absolute value with the desired type without - // using a conditional branch and without invoking undefined or - // implementation defined behavior: - UnsignedOfSize u = ((2 * UnsignedOfSize(i)) & ~mask) - UnsignedOfSize(i); - // Unconditionally store a minus sign when producing digits - // in a forward direction and increment the pointer only if - // the value is in fact negative. - // This avoids a conditional branch and is safe because we will - // always produce at least one digit and it will overwrite the - // minus sign when the value is not negative. - *p = '-'; - p += (mask & 1); - p = convert::uitoa(p, u); - return p; - } -} - - -template -static inline char * writeUIntText(T _x, char * p) -{ - int len = digits10(_x); - static_assert(std::is_same_v || std::is_same_v); - using T_ = std::conditional_t< - std::is_same_v, - unsigned __int128, -#if defined(__x86_64__) -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wbit-int-extension" - unsigned _BitInt(256) -#pragma clang diagnostic pop -#else - T -#endif - >; - - T_ x; - T_ hundred(100ULL); - if constexpr (std::is_same_v) - { - x = (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); - } - else - { -#if defined(__x86_64__) - x = (T_(_x.items[T::_impl::little(3)]) << 192) + (T_(_x.items[T::_impl::little(2)]) << 128) + - (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); -#else - x = _x; -#endif - } - - auto * pp = p + len; - while (x >= hundred) - { - const auto i = x % hundred; - x /= hundred; - pp -= 2; - outTwoDigits(pp, i); - } - if (x < 10) - *p = '0' + x; - else - outTwoDigits(p, x); - return p + len; -} - -static inline char * writeLeadingMinus(char * pos) -{ - *pos = '-'; - return pos + 1; -} - -template -static inline char * writeSIntText(T x, char * pos) -{ - static_assert(std::is_same_v || std::is_same_v); - - using UnsignedT = make_unsigned_t; - static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1); - - if (unlikely(x == min_int)) - { - if constexpr (std::is_same_v) - { - const char * res = "-170141183460469231731687303715884105728"; - memcpy(pos, res, strlen(res)); - return pos + strlen(res); - } - else if constexpr (std::is_same_v) - { - const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968"; - memcpy(pos, res, strlen(res)); - return pos + strlen(res); - } - } - - if (x < 0) - { - x = -x; - pos = writeLeadingMinus(pos); - } - return writeUIntText(UnsignedT(x), pos); -} - -} - -template -char * itoa(I i, char * p) -{ - return impl::convert::itoa(i, p); -} - -template <> -inline char * itoa(char8_t i, char * p) -{ - return impl::convert::itoa(uint8_t(i), p); -} - -template <> -inline char * itoa(UInt128 i, char * p) -{ - return impl::writeUIntText(i, p); -} - -template <> -inline char * itoa(Int128 i, char * p) -{ - return impl::writeSIntText(i, p); -} - -template <> -inline char * itoa(UInt256 i, char * p) -{ - return impl::writeUIntText(i, p); -} - -template <> -inline char * itoa(Int256 i, char * p) -{ - return impl::writeSIntText(i, p); -} +#define DIGITS_INTEGER_TYPES(M) \ + M(uint8_t) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(UInt128) \ + M(UInt256) +#define INSTANTIATION(T) \ + extern template int digits10(T x); +DIGITS_INTEGER_TYPES(INSTANTIATION) +#undef DIGITS_INTEGER_TYPES +#undef INSTANTIATION diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index ac3e3671ae0..dea369a508a 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -37,6 +37,7 @@ list (APPEND PUBLIC_LIBS clickhouse_dictionaries_embedded clickhouse_parsers ch_contrib::consistent_hashing + common dbms ch_contrib::metrohash ch_contrib::murmurhash From 7fd13df8a5055892d2f8cdc83dcb900c19c87a95 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 7 Mar 2024 17:09:55 +0100 Subject: [PATCH 161/985] check memory limit periodically --- programs/keeper/Keeper.cpp | 10 +++++++++ programs/server/Server.cpp | 1 + src/Common/CgroupsMemoryUsageObserver.cpp | 26 +++++++++++++++++------ src/Common/CgroupsMemoryUsageObserver.h | 7 ++++-- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 8972c82eab8..76dd8cb15a5 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -623,6 +624,15 @@ try buildLoggers(config(), logger()); main_config_reloader->start(); + std::optional observer; + auto cgroups_memory_observer_wait_time = config().getUInt64("keeper_server.cgroups_memory_observer_wait_time", 1); + if (cgroups_memory_observer_wait_time > 0) + { + observer.emplace(std::chrono::seconds(cgroups_memory_observer_wait_time)); + observer->startThread(); + } + + LOG_INFO(log, "Ready for connections."); waitForTerminationRequest(); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c45291ba52c..6b282893dee 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1362,6 +1362,7 @@ try cgroups_memory_usage_observer->setLimits( static_cast(max_server_memory_usage * hard_limit_ratio), static_cast(max_server_memory_usage * soft_limit_ratio)); + cgroups_memory_usage_observer->startThread(); } size_t merges_mutations_memory_usage_soft_limit = new_server_settings.merges_mutations_memory_usage_soft_limit; diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index 9bed6b191e4..5f24c2553b5 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -8,7 +8,9 @@ #include #include #include +#include #include +#include #include #include @@ -48,11 +50,10 @@ CgroupsMemoryUsageObserver::~CgroupsMemoryUsageObserver() void CgroupsMemoryUsageObserver::setLimits(uint64_t hard_limit_, uint64_t soft_limit_) { + std::lock_guard lock(set_limit_mutex); if (hard_limit_ == hard_limit && soft_limit_ == soft_limit) return; - stopThread(); - hard_limit = hard_limit_; soft_limit = soft_limit_; @@ -94,8 +95,6 @@ void CgroupsMemoryUsageObserver::setLimits(uint64_t hard_limit_, uint64_t soft_l } }; - startThread(); - LOG_INFO(log, "Set new limits, soft limit: {}, hard limit: {}", ReadableSize(soft_limit_), ReadableSize(hard_limit_)); } @@ -277,7 +276,7 @@ void CgroupsMemoryUsageObserver::stopThread() void CgroupsMemoryUsageObserver::runThread() { setThreadName("CgrpMemUsgObsr"); - + last_memory_amount = getMemoryAmount(); std::unique_lock lock(thread_mutex); while (true) { @@ -286,8 +285,21 @@ void CgroupsMemoryUsageObserver::runThread() try { - uint64_t memory_usage = file.readMemoryUsage(); - processMemoryUsage(memory_usage); + uint64_t memory_limit = getMemoryAmount(); + if (memory_limit != last_memory_amount) + { + last_memory_amount = memory_limit; + /// if we find memory amount changes, we just reload config. + /// Reloading config will check the memory amount again and calculate soft/hard limit again. + auto global_context = getContext()->getGlobalContext(); + global_context->reloadConfig(); + } + std::lock_guard set_limit_lock(set_limit_mutex); + if (soft_limit > 0 && hard_limit > 0) + { + uint64_t memory_usage = file.readMemoryUsage(); + processMemoryUsage(memory_usage); + } } catch (...) { diff --git a/src/Common/CgroupsMemoryUsageObserver.h b/src/Common/CgroupsMemoryUsageObserver.h index 28bf08c82b5..6edf2e2049d 100644 --- a/src/Common/CgroupsMemoryUsageObserver.h +++ b/src/Common/CgroupsMemoryUsageObserver.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -14,7 +15,7 @@ namespace DB /// - When the soft memory limit is hit, drop jemalloc cache. /// - When the hard memory limit is hit, update MemoryTracking metric to throw memory exceptions faster. #if defined(OS_LINUX) -class CgroupsMemoryUsageObserver +class CgroupsMemoryUsageObserver : public WithContext { public: enum class CgroupsVersion @@ -27,6 +28,7 @@ public: ~CgroupsMemoryUsageObserver(); void setLimits(uint64_t hard_limit_, uint64_t soft_limit_); + void startThread(); size_t getHardLimit() const { return hard_limit; } size_t getSoftLimit() const { return soft_limit; } @@ -64,16 +66,17 @@ private: File file; - void startThread(); void stopThread(); void runThread(); void processMemoryUsage(uint64_t usage); std::mutex thread_mutex; + std::mutex set_limit_mutex; std::condition_variable cond; ThreadFromGlobalPool thread; bool quit = false; + uint64_t last_memory_amount; }; #else From bd194aab41401492c5d628269df53e68243a1211 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Thu, 7 Mar 2024 12:55:21 -0400 Subject: [PATCH 162/985] Adds makeDateTime64 function. --- .../functions/date-time-functions.md | 137 ++++++++++++++---- .../functions/other-functions.md | 65 ++++++++- 2 files changed, 170 insertions(+), 32 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 41503abfa2f..12f0c996ce7 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,66 +26,115 @@ SELECT ## makeDate -Creates a [Date](../../sql-reference/data-types/date.md) -- from a year, month and day argument, or -- from a year and day of year argument. +Creates a [Date](../../sql-reference/data-types/date.md) from either one of the following sets of arguments: -**Syntax** +- a year, month, and day. +- a year and day of year. -``` sql -makeDate(year, month, day); -makeDate(year, day_of_year); +### Syntax + +Using a year, month, and day: + +```sql +makeDate(year, month, day) ``` -Alias: -- `MAKEDATE(year, month, day);` -- `MAKEDATE(year, day_of_year);` +Using a year and day of year: -**Arguments** +```sql +makeDate(year, day_of_year) +``` + +### Arguments - `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -**Returned value** +### Returned values -- A date created from the arguments. +A date created from the arguments. Type: [Date](../../sql-reference/data-types/date.md). -**Example** +### Examples Create a Date from a year, month and day: -``` sql +```sql SELECT makeDate(2023, 2, 28) AS Date; ``` -Result: - -``` text -┌───────date─┐ -│ 2023-02-28 │ -└────────────┘ +```response +2023-02-28 ``` -Create a Date from a year and day of year argument: +Create a Date from a year and day of year: ``` sql SELECT makeDate(2023, 42) AS Date; ``` -Result: - -``` text -┌───────date─┐ -│ 2023-02-11 │ -└────────────┘ +```response +2023-02-11 ``` + ## makeDate32 -Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). +Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from either one of the following sets of arguments: + +- a year, month, and day. +- a year and day of year. + +### Syntax + +Using a year, month, and day: + +```sql +makeDate32(year, month, day) +``` + +Using a year and day of year: + +```sql +makeDate32(year, day_of_year) +``` + +### Arguments + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +### Returned values + +A date created from the arguments. + +Type: [Date32](../../sql-reference/data-types/date32.md). + +### Examples + +Create a date from a year, month, and day: + +```sql +SELECT makeDate32(2024, 1, 1); +``` + +```response +2024-01-01 +``` + +Create a Date from a year and day of year: + +``` sql +SELECT makeDate32(2024, 100); +``` + +```response +2024-04-09 +``` ## makeDateTime @@ -129,12 +178,38 @@ Result: ## makeDateTime64 -Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). +Create a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components (year, month, day, hour, minute, second, and optionally, subsecond precision). + +The DateTime64 data type stores both the date and time components in a single 64-bit integer value. The precision of the time component is configurable, allowing you to store time values with subsecond precision up to nanoseconds. **Syntax** +```sql +makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision]]) +``` + +**Arguments** + +- `year` — [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The year component (0-9999). +- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The month component (1-12). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The day component (1-31). +- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The hour component (0-23). +- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The minute component (0-59). +- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The second component (0-59). +- `subsecond_precision` (optional) [Integer](../../sql-reference/data-types/int-uint.md): The precision of the subsecond component (0-9, where 0 means no subsecond precision, and 9 means nanosecond precision). + +**Returned value** + +A date and time element of type [DateTime64](../../sql-reference/data-types/datetime64.md) with created from the supplied arguments. + +**Example** + ``` sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); +``` + +```response +2023-05-15 10:30:45.00779 ``` ## timestamp diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 739b688a0d2..10ceedad9aa 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1866,7 +1866,7 @@ As you can see, `runningAccumulate` merges states for each group of rows separat ## joinGet -The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). +Allows you to extract data from a specific column in a Join table, similar to how you would access a value from a dictionary. Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. @@ -1927,6 +1927,69 @@ Result: └──────────────────────────────────────────────────┘ ``` +## joinGetOrNull + +Allows you to extract data from a specific column in a Join table, similar to how you would access a value from a dictionary. + +Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. + +Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` statement. + +### Syntax + +```sql +joinGet(join_storage_table_name, `value_column`, join_keys) +``` + +### Parameters + +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. +- `value_column` — name of the column of the table that contains required data. +- `join_keys` — list of keys. + +### Returned value + +Returns a list of values corresponded to list of keys. + +If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. + +More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md). + +**Example** + +Input table: + +``` sql +CREATE DATABASE db_test +CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1 +INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) +``` + +``` text +┌─id─┬─val─┐ +│ 4 │ 13 │ +│ 2 │ 12 │ +│ 1 │ 11 │ +└────┴─────┘ +``` + +Query: + +``` sql +SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 +``` + +Result: + +``` text +┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐ +│ 0 │ +│ 11 │ +│ 12 │ +│ 0 │ +└──────────────────────────────────────────────────┘ +``` + ## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) :::note From 1b2357198d4c1b92fbe3f5a6c0c11be6f1d80a8f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 7 Mar 2024 17:03:43 +0000 Subject: [PATCH 163/985] Fix: parallel replicas + prewhere --- src/Interpreters/InterpreterSelectQuery.cpp | 5 ++-- ...03006_parallel_replicas_prewhere.reference | 0 .../03006_parallel_replicas_prewhere.sql | 29 +++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference create mode 100644 tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index eaa256a16b0..80e2d5afef7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2112,7 +2112,7 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. analysis.prewhere_info - = std::make_shared(std::move(analysis.filter_info->actions), std::move(analysis.filter_info->column_name)); + = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); analysis.prewhere_info->prewhere_actions->projectInput(false); analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; analysis.prewhere_info->need_filter = true; @@ -2121,8 +2121,9 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis } else { + chassert(analysis.filter_info->actions); /// Add row level security actions to prewhere. - analysis.prewhere_info->row_level_filter = std::move(analysis.filter_info->actions); + analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); analysis.prewhere_info->row_level_filter->projectInput(false); analysis.filter_info = nullptr; diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql new file mode 100644 index 00000000000..819526b5596 --- /dev/null +++ b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql @@ -0,0 +1,29 @@ +DROP POLICY IF EXISTS url_na_log_policy0 ON url_na_log; +DROP TABLE IF EXISTS url_na_log; + +CREATE TABLE url_na_log +( + `SiteId` UInt32, + `DateVisit` Date +) +ENGINE = MergeTree +PRIMARY KEY SiteId +ORDER BY (SiteId, DateVisit) +SETTINGS index_granularity = 1000, min_bytes_for_wide_part = 0; + +CREATE ROW POLICY url_na_log_policy0 ON url_na_log FOR SELECT USING (DateVisit < '2022-08-11') OR (DateVisit > '2022-08-19') TO default; + +INSERT INTO url_na_log +SETTINGS max_insert_block_size = 200000 +SELECT + 209, + CAST('2022-08-09', 'Date') + toIntervalDay(intDiv(number, 10000)) +FROM numbers(130000) +SETTINGS max_insert_block_size = 200000; + +EXPLAIN ESTIMATE +SELECT count() +FROM url_na_log +PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') +SETTINGS max_block_size = 1048576, max_threads = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3, allow_experimental_analyzer=0, parallel_replicas_min_number_of_rows_per_replica=10000; + From 0f0ea422f21af8e37aa5c8ef58002d608cde5c77 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 7 Mar 2024 17:05:54 +0000 Subject: [PATCH 164/985] separate limits on number of waiting and executing queries --- .../settings.md | 16 ++- programs/server/Server.cpp | 1 + src/Common/AsyncLoader.cpp | 21 +++- src/Common/AsyncLoader.h | 65 ++++++++++- src/Common/tests/gtest_async_loader.cpp | 66 +++++++++++ src/Core/ServerSettings.h | 1 + src/Interpreters/ProcessList.cpp | 103 +++++++++++++++--- src/Interpreters/ProcessList.h | 41 ++++++- .../System/StorageSystemServerSettings.cpp | 1 + 9 files changed, 293 insertions(+), 22 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 07c9a2b88ab..63fbd9d1964 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -379,6 +379,18 @@ Type: UInt64 Default: 0 +## max_waiting_queries + +Limit on total number of concurrently waiting queries. Execution of a waiting query is blocked while required tables are loading asynchronously (see `async_load_databases`). Note that waiting queries are not counted when `max_concurrent_queries`, `max_concurrent_insert_queries`, `max_concurrent_select_queries`, `max_concurrent_queries_for_user` and `max_concurrent_queries_for_all_users` limits are checked. This correction is done to avoid hitting these limits just after server startup. Zero means unlimited. + +:::note +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +::: + +Type: UInt64 + +Default: 0 + ## max_connections Max server connections. @@ -1725,7 +1737,7 @@ Default value: `0.5`. Asynchronous loading of databases and tables. -If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.asynchronous_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up. +If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.asynchronous_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up. Also consider setting a limit `max_waiting_queries` for the total number of waiting queries. If `false`, all databases are loaded when the server starts. @@ -2926,7 +2938,7 @@ Default: 0 ## ignore_empty_sql_security_in_create_view_query {#ignore_empty_sql_security_in_create_view_query} -If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. +If true, ClickHouse doesn't write defaults for empty SQL security statement in CREATE VIEW queries. :::note This setting is only necessary for the migration period and will become obsolete in 24.4 diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a10f47be0b8..336563665a2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1429,6 +1429,7 @@ try global_context->getProcessList().setMaxSize(new_server_settings.max_concurrent_queries); global_context->getProcessList().setMaxInsertQueriesAmount(new_server_settings.max_concurrent_insert_queries); global_context->getProcessList().setMaxSelectQueriesAmount(new_server_settings.max_concurrent_select_queries); + global_context->getProcessList().setMaxWaitingQueriesAmount(new_server_settings.max_waiting_queries); if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index 140194e10b4..80e4c72f1c1 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -140,6 +140,11 @@ void LoadJob::finish() finish_time = std::chrono::system_clock::now(); if (waiters > 0) finished.notify_all(); + else + { + on_waiters_increment = {}; + on_waiters_decrement = {}; + } } void LoadJob::scheduled(UInt64 job_id_) @@ -765,11 +770,25 @@ void AsyncLoader::wait(std::unique_lock & job_lock, const LoadJobPtr if (job->load_status != LoadStatus::PENDING) // Shortcut just to avoid incrementing ProfileEvents return; + if (job->on_waiters_increment) + job->on_waiters_increment(job); + + // WARNING: it is important not to throw below this point to avoid `on_waiters_increment` call w/o matching `on_waiters_decrement` call + Stopwatch watch; job->waiters++; job->finished.wait(job_lock, [&] { return job->load_status != LoadStatus::PENDING; }); job->waiters--; ProfileEvents::increment(ProfileEvents::AsyncLoaderWaitMicroseconds, watch.elapsedMicroseconds()); + + if (job->on_waiters_decrement) + job->on_waiters_decrement(job); + + if (job->waiters == 0) + { + job->on_waiters_increment = {}; + job->on_waiters_decrement = {}; + } } bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock &) @@ -859,7 +878,7 @@ void AsyncLoader::worker(Pool & pool) try { current_load_job = job.get(); - SCOPE_EXIT({ current_load_job = nullptr; }); // Note that recursive job execution is not supported + SCOPE_EXIT({ current_load_job = nullptr; }); // Note that recursive job execution is not supported, but jobs can wait one another job->execute(*this, pool_id, job); exception_from_job = {}; } diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index b1b336d24dc..3f81a36aa96 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -59,7 +59,8 @@ enum class LoadStatus class LoadJob : private boost::noncopyable { public: - template + // NOTE: makeLoadJob() helper should be used instead of direct ctor call + template LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, DFFunc && dependency_failure_, Func && func_) : dependencies(std::forward(dependencies_)) , name(std::move(name_)) @@ -69,6 +70,19 @@ public: , func(std::forward(func_)) {} + // NOTE: makeLoadJob() helper should be used instead of direct ctor call + template + LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, WIFunc && on_waiters_increment_, WDFunc && on_waiters_decrement_, DFFunc && dependency_failure_, Func && func_) + : dependencies(std::forward(dependencies_)) + , name(std::move(name_)) + , execution_pool_id(pool_id_) + , pool_id(pool_id_) + , on_waiters_increment(std::forward(on_waiters_increment_)) + , on_waiters_decrement(std::forward(on_waiters_decrement_)) + , dependency_failure(std::forward(dependency_failure_)) + , func(std::forward(func_)) + {} + // Current job status. LoadStatus status() const; std::exception_ptr exception() const; @@ -112,6 +126,13 @@ private: std::atomic execution_pool_id; std::atomic pool_id; + // Handlers that is called by every new waiting thread, just before going to sleep. + // If `on_waiters_increment` throws, then wait is canceled, and corresponding `on_waiters_decrement` will never be called. + // It can be used for counting and limits on number of waiters. + // Note that implementations are called under `LoadJob::mutex` and should be fast. + std::function on_waiters_increment; + std::function on_waiters_decrement; + // Handler for failed or canceled dependencies. // If job needs to be canceled on `dependency` failure, then function should set `cancel` to a specific reason. // Note that implementation should be fast and cannot use AsyncLoader, because it is called under `AsyncLoader::mutex`. @@ -140,8 +161,50 @@ void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & depen void ignoreDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel); template concept LoadJobDependencyFailure = std::invocable; +template concept LoadJobOnWaiters = std::invocable; template concept LoadJobFunc = std::invocable; +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) +{ + return std::make_shared(std::move(dependencies), std::move(name), 0, on_waiters_increment, on_waiters_decrement, std::forward(dependency_failure), std::forward(func)); +} + +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) +{ + return std::make_shared(dependencies, std::move(name), 0, on_waiters_increment, on_waiters_decrement, std::forward(dependency_failure), std::forward(func)); +} + +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) +{ + return std::make_shared(std::move(dependencies), std::move(name), pool_id, on_waiters_increment, on_waiters_decrement, std::forward(dependency_failure), std::forward(func)); +} + +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) +{ + return std::make_shared(dependencies, std::move(name), pool_id, on_waiters_increment, on_waiters_decrement, std::forward(dependency_failure), std::forward(func)); +} + +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobFunc auto && func) +{ + return std::make_shared(std::move(dependencies), std::move(name), 0, on_waiters_increment, on_waiters_decrement, cancelOnDependencyFailure, std::forward(func)); +} + +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobFunc auto && func) +{ + return std::make_shared(dependencies, std::move(name), 0, on_waiters_increment, on_waiters_decrement, cancelOnDependencyFailure, std::forward(func)); +} + +LoadJobPtr makeLoadJob(LoadJobSet && dependencies, size_t pool_id, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobFunc auto && func) +{ + return std::make_shared(std::move(dependencies), std::move(name), pool_id, on_waiters_increment, on_waiters_decrement, cancelOnDependencyFailure, std::forward(func)); +} + +LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String name, LoadJobOnWaiters auto && on_waiters_increment, LoadJobOnWaiters auto && on_waiters_decrement, LoadJobFunc auto && func) +{ + return std::make_shared(dependencies, std::move(name), pool_id, on_waiters_increment, on_waiters_decrement, cancelOnDependencyFailure, std::forward(func)); +} + + LoadJobPtr makeLoadJob(LoadJobSet && dependencies, String name, LoadJobDependencyFailure auto && dependency_failure, LoadJobFunc auto && func) { return std::make_shared(std::move(dependencies), std::move(name), 0, std::forward(dependency_failure), std::forward(func)); diff --git a/src/Common/tests/gtest_async_loader.cpp b/src/Common/tests/gtest_async_loader.cpp index fc2537abcfc..62a27f259cc 100644 --- a/src/Common/tests/gtest_async_loader.cpp +++ b/src/Common/tests/gtest_async_loader.cpp @@ -643,6 +643,72 @@ TEST(AsyncLoader, CustomDependencyFailure) ASSERT_EQ(good_count.load(), 3); } +TEST(AsyncLoader, WaitersLimit) +{ + AsyncLoaderTest t(16); + + std::atomic waiters_total{0}; + int waiters_limit = 5; + auto waiters_inc = [&] (const LoadJobPtr &) { + int value = waiters_total.load(); + while (true) + { + if (value >= waiters_limit) + throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "Too many waiters: {}", value); + if (waiters_total.compare_exchange_strong(value, value + 1)) + break; + } + }; + auto waiters_dec = [&] (const LoadJobPtr &) { + waiters_total.fetch_sub(1); + }; + + std::barrier sync(2); + t.loader.start(); + + auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) { + sync.arrive_and_wait(); // (A) + }; + + auto job = makeLoadJob({}, "job", waiters_inc, waiters_dec, job_func); + auto task = t.schedule({job}); + + std::atomic failure{0}; + std::atomic success{0}; + std::vector waiters; + waiters.reserve(10); + auto waiter = [&] { + try + { + t.loader.wait(job); + success.fetch_add(1); + } + catch(...) + { + failure.fetch_add(1); + } + }; + + for (int i = 0; i < 10; i++) + waiters.emplace_back(waiter); + + while (failure.load() != 5) + std::this_thread::yield(); + + ASSERT_EQ(job->waitersCount(), 5); + + sync.arrive_and_wait(); // (A) + + for (auto & thread : waiters) + thread.join(); + + ASSERT_EQ(success.load(), 5); + ASSERT_EQ(failure.load(), 5); + ASSERT_EQ(waiters_total.load(), 0); + + t.loader.wait(); +} + TEST(AsyncLoader, TestConcurrency) { AsyncLoaderTest t(10); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index c82255ec59c..129b1016fca 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -64,6 +64,7 @@ namespace DB M(UInt64, max_concurrent_queries, 0, "Maximum number of concurrently executed queries. Zero means unlimited.", 0) \ M(UInt64, max_concurrent_insert_queries, 0, "Maximum number of concurrently INSERT queries. Zero means unlimited.", 0) \ M(UInt64, max_concurrent_select_queries, 0, "Maximum number of concurrently SELECT queries. Zero means unlimited.", 0) \ + M(UInt64, max_waiting_queries, 0, "Maximum number of concurrently waiting queries blocked due to `async_load_databases`. Note that waiting queries are not considered by `max_concurrent_*queries*` limits. Zero means unlimited.", 0) \ \ M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro RAM max ratio. Allows to lower cache size on low-memory systems.", 0) \ M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \ diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 3bd7b2d4206..f451d561e60 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -83,25 +83,31 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q IAST::QueryKind query_kind = ast->getQueryKind(); const auto queue_max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); - if (!is_unlimited_query && max_size && processes.size() >= max_size) + UInt64 waiting_queries = waiting_queries_amount.load(); + if (!is_unlimited_query && max_size && processes.size() >= max_size + waiting_queries) { if (queue_max_wait_ms) LOG_WARNING(getLogger("ProcessList"), "Too many simultaneous queries, will wait {} ms.", queue_max_wait_ms); - if (!queue_max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(queue_max_wait_ms), [&]{ return processes.size() < max_size; })) - throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries. Maximum: {}", max_size); + if (!queue_max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(queue_max_wait_ms), + [&]{ waiting_queries = waiting_queries_amount.load(); return processes.size() < max_size + waiting_queries; })) + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, + "Too many simultaneous queries. Maximum: {}{}", + max_size, waiting_queries == 0 ? "" : fmt::format(", waiting: {}", waiting_queries)); } if (!is_unlimited_query) { QueryAmount amount = getQueryKindAmount(query_kind); - if (max_insert_queries_amount && query_kind == IAST::QueryKind::Insert && amount >= max_insert_queries_amount) + UInt64 waiting_inserts = waiting_insert_queries_amount.load(); + UInt64 waiting_selects = waiting_select_queries_amount.load(); + if (max_insert_queries_amount && query_kind == IAST::QueryKind::Insert && amount >= max_insert_queries_amount + waiting_inserts) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, - "Too many simultaneous insert queries. Maximum: {}, current: {}", - max_insert_queries_amount, amount); - if (max_select_queries_amount && query_kind == IAST::QueryKind::Select && amount >= max_select_queries_amount) + "Too many simultaneous insert queries. Maximum: {}, current: {}{}", + max_insert_queries_amount, amount, waiting_inserts == 0 ? "" : fmt::format(", waiting: {}", waiting_inserts)); + if (max_select_queries_amount && query_kind == IAST::QueryKind::Select && amount >= max_select_queries_amount + waiting_selects) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, - "Too many simultaneous select queries. Maximum: {}, current: {}", - max_select_queries_amount, amount); + "Too many simultaneous select queries. Maximum: {}, current: {}{}", + max_select_queries_amount, amount, waiting_selects == 0 ? "" : fmt::format(", waiting: {}", waiting_selects)); } { @@ -124,10 +130,12 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q * once is already processing 50+ concurrent queries (including analysts or any other users). */ + waiting_queries = waiting_queries_amount.load(); if (!is_unlimited_query && settings.max_concurrent_queries_for_all_users - && processes.size() >= settings.max_concurrent_queries_for_all_users) + && processes.size() >= settings.max_concurrent_queries_for_all_users + waiting_queries_amount) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for all users. " - "Current: {}, maximum: {}", processes.size(), settings.max_concurrent_queries_for_all_users.toString()); + "Current: {}, maximum: {}{}", processes.size(), settings.max_concurrent_queries_for_all_users.toString(), + waiting_queries == 0 ? "" : fmt::format(", waiting: {}", waiting_queries)); } /** Why we use current user? @@ -145,13 +153,15 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (user_process_list != user_to_queries.end()) { + UInt64 user_waiting_queries = user_process_list->second.waiting_queries_amount.load(); if (!is_unlimited_query && settings.max_concurrent_queries_for_user - && user_process_list->second.queries.size() >= settings.max_concurrent_queries_for_user) + && user_process_list->second.queries.size() >= settings.max_concurrent_queries_for_user + user_waiting_queries) throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for user {}. " - "Current: {}, maximum: {}", + "Current: {}, maximum: {}{}", client_info.current_user, user_process_list->second.queries.size(), - settings.max_concurrent_queries_for_user.toString()); + settings.max_concurrent_queries_for_user.toString(), + user_waiting_queries == 0 ? "" : fmt::format(", waiting: {}", user_waiting_queries)); auto running_query = user_process_list->second.queries.find(client_info.current_query_id); @@ -745,4 +755,69 @@ ProcessList::QueryAmount ProcessList::getQueryKindAmount(const IAST::QueryKind & return found->second; } +void ProcessList::increaseWaitingQueryAmount(const QueryStatusPtr & status) +{ + UInt64 limit = max_waiting_queries_amount.load(); + UInt64 value = waiting_queries_amount.load(); + while (true) + { + if (value >= limit) + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, + "Too many simultaneous waiting queries. Maximum: {}, waiting: {}", + limit, value); + if (waiting_queries_amount.compare_exchange_strong(value, value + 1)) + break; + } + + // WARNING: After this point we should not throw, otherwise corresponding `decreaseWaitingQueryAmount` will not be called. + + // Update query kind counters + if (status->query_kind == IAST::QueryKind::Insert) + waiting_insert_queries_amount.fetch_add(1); + if (status->query_kind == IAST::QueryKind::Select) + waiting_select_queries_amount.fetch_add(1); + + // Update per-user counter + status->getUserProcessList()->waiting_queries_amount.fetch_add(1); + + // We have to notify because some queries might be waiting on `have_space` + // and this query leaves its space by transitioning to waiting state + have_space.notify_all(); +} + +void ProcessList::decreaseWaitingQueryAmount(const QueryStatusPtr & status) +{ + if (status->getUserProcessList()->waiting_queries_amount.fetch_sub(1) == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong insert waiting query amount for user: decrease to negative"); + + if (status->query_kind == IAST::QueryKind::Insert && waiting_insert_queries_amount.fetch_sub(1) == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong insert waiting query amount: decrease to negative"); + + if (status->query_kind == IAST::QueryKind::Select && waiting_select_queries_amount.fetch_sub(1) == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong select waiting query amount: decrease to negative"); + + if (waiting_queries_amount.fetch_sub(1) == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong waiting query amount: decrease to negative"); +} + +void ProcessList::incrementWaiters() +{ + ContextPtr context = CurrentThread::getQueryContext(); + QueryStatusPtr status = context->getProcessListElement(); + + // Query became "waiting" with the first thread that waits + if (status->waiting_threads.fetch_add(1) == 0) + increaseWaitingQueryAmount(status); +} + +void ProcessList::decrementWaiters() +{ + ContextPtr context = CurrentThread::getQueryContext(); + QueryStatusPtr status = context->getProcessListElement(); + + // Query became "non-waiting" with the last thread that no longer waits + if (status->waiting_threads.fetch_sub(1) == 1) + decreaseWaitingQueryAmount(status); +} + } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 1c253f562e8..75350627698 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -42,10 +42,6 @@ class ThreadStatus; class ProcessListEntry; -/** List of currently executing queries. - * Also implements limit on their number. - */ - /** Information of process list element. * To output in SHOW PROCESSLIST query. Does not contain any complex objects, that do something on copy or destructor. */ @@ -114,8 +110,13 @@ protected: /// Including EndOfStream or Exception. std::atomic is_all_data_sent { false }; + /// Number of threads for the query that are waiting for load jobs + std::atomic waiting_threads{0}; + + /// For initialization of ProcessListForUser during process insertion. void setUserProcessList(ProcessListForUser * user_process_list_); /// Be careful using it. For example, queries field of ProcessListForUser could be modified concurrently. + ProcessListForUser * getUserProcessList() { return user_process_list; } const ProcessListForUser * getUserProcessList() const { return user_process_list; } /// Sets an entry in the ProcessList associated with this QueryStatus. @@ -283,6 +284,9 @@ struct ProcessListForUser /// Count network usage for all simultaneously running queries of single user. ThrottlerPtr user_throttler; + /// Number of queries waiting on load jobs + std::atomic waiting_queries_amount{0}; + ProcessListForUserInfo getInfo(bool get_profile_events = false) const; /// Clears MemoryTracker for the user. @@ -341,6 +345,9 @@ protected: }; +/** List of currently executing queries. + * Also implements limit on their number. + */ class ProcessList : public ProcessListBase { public: @@ -399,10 +406,21 @@ protected: /// amount of queries by query kind. QueryKindAmounts query_kind_amounts; + /// limit for waiting queries. 0 means no limit. Otherwise, when limit exceeded, an exception is thrown. + std::atomic max_waiting_queries_amount{0}; + + /// amounts of waiting queries + std::atomic waiting_queries_amount{0}; + std::atomic waiting_insert_queries_amount{0}; + std::atomic waiting_select_queries_amount{0}; + void increaseQueryKindAmount(const IAST::QueryKind & query_kind); void decreaseQueryKindAmount(const IAST::QueryKind & query_kind); QueryAmount getQueryKindAmount(const IAST::QueryKind & query_kind) const; + void increaseWaitingQueryAmount(const QueryStatusPtr & status); + void decreaseWaitingQueryAmount(const QueryStatusPtr & status); + public: using EntryPtr = std::shared_ptr; @@ -458,6 +476,21 @@ public: return max_select_queries_amount; } + void setMaxWaitingQueriesAmount(UInt64 max_waiting_queries_amount_) + { + max_waiting_queries_amount.store(max_waiting_queries_amount_); + // NOTE: We cannot cancel waiting queries when limit is lowered. They have to wait anyways, but new queries will be canceled instead of waiting. + } + + size_t getMaxWaitingQueriesAmount() const + { + return max_waiting_queries_amount.load(); + } + + // Handlers for AsyncLoader waiters + void incrementWaiters(); + void decrementWaiters(); + /// Try call cancel() for input and output streams of query with specified id and user CancellationCode sendCancelToQuery(const String & current_query_id, const String & current_user, bool kill = false); CancellationCode sendCancelToQuery(QueryStatusPtr elem, bool kill = false); diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index f390985546b..bf14f757a19 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -70,6 +70,7 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context {"max_concurrent_queries", {std::to_string(context->getProcessList().getMaxSize()), ChangeableWithoutRestart::Yes}}, {"max_concurrent_insert_queries", {std::to_string(context->getProcessList().getMaxInsertQueriesAmount()), ChangeableWithoutRestart::Yes}}, {"max_concurrent_select_queries", {std::to_string(context->getProcessList().getMaxSelectQueriesAmount()), ChangeableWithoutRestart::Yes}}, + {"max_waiting_queries", {std::to_string(context->getProcessList().getMaxWaitingQueriesAmount()), ChangeableWithoutRestart::Yes}}, {"background_buffer_flush_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundBufferFlushSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}}, {"background_schedule_pool_size", {std::to_string(CurrentMetrics::get(CurrentMetrics::BackgroundSchedulePoolSize)), ChangeableWithoutRestart::IncreaseOnly}}, From f4fc65449cc3ace36f33323600fd1a47fbfb9736 Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 8 Mar 2024 01:20:50 +0800 Subject: [PATCH 165/985] Add another example dataset for presenting usage --- .../example-datasets/tw-weather.md | 293 ++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 docs/en/getting-started/example-datasets/tw-weather.md diff --git a/docs/en/getting-started/example-datasets/tw-weather.md b/docs/en/getting-started/example-datasets/tw-weather.md new file mode 100644 index 00000000000..e5f16c403d5 --- /dev/null +++ b/docs/en/getting-started/example-datasets/tw-weather.md @@ -0,0 +1,293 @@ +--- +slug: /en/getting-started/example-datasets/tw-weather +sidebar_label: Taiwan Historical Weather Datasets +sidebar_position: 1 +description: 131 million rows of weather observation data for the last 128 yrs +--- + +# Taiwan Historical Weather Datasets + +This dataset contains historical meteorological observations measurements for the last 128 years. Each row is a measurement for a point in date time and weather station. + +The origin of this dataset is available [here](https://github.com/Raingel/historical_weather) and the list of weather station numbers can be found [here](https://github.com/Raingel/weather_station_list). + +> The sources of meteorological datasets include the meteorological stations that are established by the Central Weather Administration (station code is beginning with C0, C1, and 4) and the agricultural meteorological stations belonging to the Council of Agriculture (station code other than those mentioned above): + + - StationId + - MeasuredDate, the observation time + - StnPres, the station air pressure + - SeaPres, the sea level pressure + - Td, the dew point temperature + - RH, the relative humidity + - Other elements where available + +## Downloading the data + +- A [pre-processed version](#pre-processed-data) of the data for the ClickHouse, which has been cleaned, re-structured, and enriched. This dataset covers the years from 1896 to 2023. +- [Download the original raw data](#original-raw-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore or complete their approaches. + +### Pre-processed data + +The dataset has also been re-structured from a measurement per line to a row per weather station id and measured date, i.e. + +```csv +StationId,MeasuredDate,StnPres,Tx,RH,WS,WD,WSGust,WDGust,Precp,GloblRad,TxSoil0cm,TxSoil5cm,TxSoil20cm,TxSoil50cm,TxSoil100cm,SeaPres,Td,PrecpHour,SunShine,TxSoil10cm,EvapA,Visb,UVI,Cloud Amount,TxSoil30cm,TxSoil200cm,TxSoil300cm,TxSoil500cm,VaporPressure +C0X100,2016-01-01 01:00:00,1022.1,16.1,72,1.1,8.0,,,,,,,,,,,,,,,,,,,,,,, +C0X100,2016-01-01 02:00:00,1021.6,16.0,73,1.2,358.0,,,,,,,,,,,,,,,,,,,,,,, +C0X100,2016-01-01 03:00:00,1021.3,15.8,74,1.5,353.0,,,,,,,,,,,,,,,,,,,,,,, +C0X100,2016-01-01 04:00:00,1021.2,15.8,74,1.7,8.0,,,,,,,,,,,,,,,,,,,,,,, +``` + +It is easy to query and ensure that the resulting table has less sparse and some elements are null because they're not available to be measured in this weather station. + +This dataset is available in the following Google CloudStorage location. Either download the dataset to your local filesystem (and insert them with the ClickHouse client) or insert them directly into the ClickHouse (see [Inserting from URL](#inserting-from-url)). + +To download: + +```bash +wget https://storage.googleapis.com/taiwan-weather-observaiton-datasets/preprocessed_weather_daily_1896_2023.tar.gz + +# Option: Validate the checksum +md5sum preprocessed_weather_daily_1896_2023.tar.gz +# Checksum should be equal to: 11b484f5bd9ddafec5cfb131eb2dd008 + +tar -xzvf preprocessed_weather_daily_1896_2023.tar.gz +daily_weather_preprocessed_1896_2023.csv + +# Option: Validate the checksum +md5sum daily_weather_preprocessed_1896_2023.csv +# Checksum should be equal to: 1132248c78195c43d93f843753881754 +``` + +### Original raw data + +The following details are about the steps to download the original raw data to transform and convert as you want. + +#### Download + +To download the original raw data: + +```bash +mkdir tw_raw_weather_data && cd tw_raw_weather_data + +wget https://storage.googleapis.com/taiwan-weather-observaiton-datasets/raw_data_weather_daily_1896_2023.tar.gz + +# Option: Validate the checksum +md5sum raw_data_weather_daily_1896_2023.tar.gz +# Checksum should be equal to: b66b9f137217454d655e3004d7d1b51a + +tar -xzvf raw_data_weather_daily_1896_2023.tar.gz +466920_1928.csv +466920_1929.csv +466920_1930.csv +466920_1931.csv +... + +# Option: Validate the checksum +cat *.csv | md5sum +# Checksum should be equal to: b26db404bf84d4063fac42e576464ce1 +``` + +#### Retrieve the Taiwan weather stations + +```bash +wget -O weather_sta_list.csv https://github.com/Raingel/weather_station_list/raw/main/data/weather_sta_list.csv + +# Option: Convert the UTF-8-BOM to UTF-8 encoding +sed -i '1s/^\xEF\xBB\xBF//' weather_sta_list.csv +``` + +## Create table schema + +Create the MergeTree table in ClickHouse (from the ClickHouse client). + +```bash +CREATE TABLE tw_weather_data ( + StationId String null, + MeasuredDate DateTime64, + StnPres Float64 null, + SeaPres Float64 null, + Tx Float64 null, + Td Float64 null, + RH Float64 null, + WS Float64 null, + WD Float64 null, + WSGust Float64 null, + WDGust Float64 null, + Precp Float64 null, + PrecpHour Float64 null, + SunShine Float64 null, + GloblRad Float64 null, + TxSoil0cm Float64 null, + TxSoil5cm Float64 null, + TxSoil10cm Float64 null, + TxSoil20cm Float64 null, + TxSoil50cm Float64 null, + TxSoil100cm Float64 null, + TxSoil30cm Float64 null, + TxSoil200cm Float64 null, + TxSoil300cm Float64 null, + TxSoil500cm Float64 null, + VaporPressure Float64 null, + UVI Float64 null, + "Cloud Amount" Float64 null, + EvapA Float64 null, + Visb Float64 null +) +ENGINE = MergeTree +ORDER BY (MeasuredDate); +``` + +## Inserting into ClickHouse + +### Inserting from local file + +Data can be inserted from a local file as follows (from the ClickHouse client): + +```sql +INSERT INTO tw_weather_data FROM INFILE '/path/to/daily_weather_preprocessed_1896_2023.csv' +``` + +where `/path/to` represents the specific user path to the local file on the disk. + +And the sample response output is as follows after inserting data into the ClickHouse: + +```response +Query id: 90e4b524-6e14-4855-817c-7e6f98fbeabb + +Ok. +131985329 rows in set. Elapsed: 71.770 sec. Processed 131.99 million rows, 10.06 GB (1.84 million rows/s., 140.14 MB/s.) +Peak memory usage: 583.23 MiB. +``` + +### Inserting from URL + +```sql +INSERT INTO tw_weather_data SELECT * +FROM url('https://storage.googleapis.com/taiwan-weather-observaiton-datasets/daily_weather_preprocessed_1896_2023.csv', 'CSVWithNames') + +``` +To know how to speed this up, please see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2). + +## Check data rows and sizes + +1. Let's see how many rows are inserted: + +```sql +SELECT formatReadableQuantity(count()) +FROM tw_weather_data; +``` + +```response +┌─formatReadableQuantity(count())─┐ +│ 131.99 million │ +└─────────────────────────────────┘ +``` + +2. Let's see how much disk space are used for this table: + +```sql +SELECT + formatReadableSize(sum(bytes)) AS disk_size, + formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size +FROM system.parts +WHERE (`table` = 'tw_weather_data') AND active +``` + +```response +┌─disk_size─┬─uncompressed_size─┐ +│ 2.13 GiB │ 32.94 GiB │ +└───────────┴───────────────────┘ +``` + +## Sample queries + +### Q1: Retrieve the highest dew point temperature for each weather station in the specific year + +```sql +SELECT + StationId, + max(Td) AS max_td +FROM tw_weather_data +WHERE (year(MeasuredDate) = 2023) AND (Td IS NOT NULL) +GROUP BY StationId + +┌─StationId─┬─max_td─┐ +│ 466940 │ 1 │ +│ 467300 │ 1 │ +│ 467540 │ 1 │ +│ 467490 │ 1 │ +│ 467080 │ 1 │ +│ 466910 │ 1 │ +│ 467660 │ 1 │ +│ 467270 │ 1 │ +│ 467350 │ 1 │ +│ 467571 │ 1 │ +│ 466920 │ 1 │ +│ 467650 │ 1 │ +│ 467550 │ 1 │ +│ 467480 │ 1 │ +│ 467610 │ 1 │ +│ 467050 │ 1 │ +│ 467590 │ 1 │ +│ 466990 │ 1 │ +│ 467060 │ 1 │ +│ 466950 │ 1 │ +│ 467620 │ 1 │ +│ 467990 │ 1 │ +│ 466930 │ 1 │ +│ 467110 │ 1 │ +│ 466881 │ 1 │ +│ 467410 │ 1 │ +│ 467441 │ 1 │ +│ 467420 │ 1 │ +│ 467530 │ 1 │ +│ 466900 │ 1 │ +└───────────┴────────┘ + +30 rows in set. Elapsed: 0.045 sec. Processed 6.41 million rows, 187.33 MB (143.92 million rows/s., 4.21 GB/s.) +``` + +### Q2: Raw data fetching with the specific duration time range, fields and weather station + +```sql +SELECT + StnPres, + SeaPres, + Tx, + Td, + RH, + WS, + WD, + WSGust, + WDGust, + Precp, + PrecpHour +FROM tw_weather_data +WHERE (StationId = 'C0UB10') AND (MeasuredDate >= '2023-12-23') AND (MeasuredDate < '2023-12-24') +ORDER BY MeasuredDate ASC +LIMIT 10 +``` + +```response +┌─StnPres─┬─SeaPres─┬───Tx─┬───Td─┬─RH─┬──WS─┬──WD─┬─WSGust─┬─WDGust─┬─Precp─┬─PrecpHour─┐ +│ 1029.5 │ ᴺᵁᴸᴸ │ 11.8 │ ᴺᵁᴸᴸ │ 78 │ 2.7 │ 271 │ 5.5 │ 275 │ -99.8 │ -99.8 │ +│ 1029.8 │ ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 78 │ 2.7 │ 289 │ 5.5 │ 308 │ -99.8 │ -99.8 │ +│ 1028.6 │ ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 79 │ 2.3 │ 251 │ 6.1 │ 289 │ -99.8 │ -99.8 │ +│ 1028.2 │ ᴺᵁᴸᴸ │ 13 │ ᴺᵁᴸᴸ │ 75 │ 4.3 │ 312 │ 7.5 │ 316 │ -99.8 │ -99.8 │ +│ 1027.8 │ ᴺᵁᴸᴸ │ 11.1 │ ᴺᵁᴸᴸ │ 89 │ 7.1 │ 310 │ 11.6 │ 322 │ -99.8 │ -99.8 │ +│ 1027.8 │ ᴺᵁᴸᴸ │ 11.6 │ ᴺᵁᴸᴸ │ 90 │ 3.1 │ 269 │ 10.7 │ 295 │ -99.8 │ -99.8 │ +│ 1027.9 │ ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 89 │ 4.7 │ 296 │ 8.1 │ 310 │ -99.8 │ -99.8 │ +│ 1028.2 │ ᴺᵁᴸᴸ │ 12.2 │ ᴺᵁᴸᴸ │ 94 │ 2.5 │ 246 │ 7.1 │ 283 │ -99.8 │ -99.8 │ +│ 1028.4 │ ᴺᵁᴸᴸ │ 12.5 │ ᴺᵁᴸᴸ │ 94 │ 3.1 │ 265 │ 4.8 │ 297 │ -99.8 │ -99.8 │ +│ 1028.3 │ ᴺᵁᴸᴸ │ 13.6 │ ᴺᵁᴸᴸ │ 91 │ 1.2 │ 273 │ 4.4 │ 256 │ -99.8 │ -99.8 │ +└─────────┴─────────┴──────┴──────┴────┴─────┴─────┴────────┴────────┴───────┴───────────┘ + +10 rows in set. Elapsed: 0.009 sec. Processed 91.70 thousand rows, 2.33 MB (9.67 million rows/s., 245.31 MB/s.) +``` + +## Credits + +We would like to acknowledge the efforts of the Central Weather Administration and Agricultural Meteorological Observation Network (Station) of the Council of Agriculture for preparing, cleaning, and distributing this dataset. We appreciate your efforts. + +Ou, J.-H., Kuo, C.-H., Wu, Y.-F., Lin, G.-C., Lee, M.-H., Chen, R.-K., Chou, H.-P., Wu, H.-Y., Chu, S.-C., Lai, Q.-J., Tsai, Y.-C., Lin, C.-C., Kuo, C.-C., Liao, C.-T., Chen, Y.-N., Chu, Y.-W., Chen, C.-Y., 2023. Application-oriented deep learning model for early warning of rice blast in Taiwan. Ecological Informatics 73, 101950. https://doi.org/10.1016/j.ecoinf.2022.101950 [13/12/2022] From a2bdb054ca5a9c205f5d0442313d3c9f4d22cda7 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 7 Mar 2024 17:31:37 +0000 Subject: [PATCH 166/985] Cleanup --- src/Interpreters/InterpreterSelectQuery.cpp | 4 +--- .../queries/0_stateless/03006_parallel_replicas_prewhere.sql | 4 +++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 80e2d5afef7..987bf560add 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2111,8 +2111,7 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis if (does_storage_support_prewhere && shouldMoveToPrewhere()) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. - analysis.prewhere_info - = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); + analysis.prewhere_info = std::make_shared(analysis.filter_info->actions, analysis.filter_info->column_name); analysis.prewhere_info->prewhere_actions->projectInput(false); analysis.prewhere_info->remove_prewhere_column = analysis.filter_info->do_remove_column; analysis.prewhere_info->need_filter = true; @@ -2121,7 +2120,6 @@ void InterpreterSelectQuery::applyFiltersToPrewhereInAnalysis(ExpressionAnalysis } else { - chassert(analysis.filter_info->actions); /// Add row level security actions to prewhere. analysis.prewhere_info->row_level_filter = analysis.filter_info->actions; analysis.prewhere_info->row_level_column_name = std::move(analysis.filter_info->column_name); diff --git a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql index 819526b5596..4b84646c034 100644 --- a/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql +++ b/tests/queries/0_stateless/03006_parallel_replicas_prewhere.sql @@ -25,5 +25,7 @@ EXPLAIN ESTIMATE SELECT count() FROM url_na_log PREWHERE (DateVisit >= toFixedString('2022-08-10', 10)) AND (DateVisit <= '2022-08-20') -SETTINGS max_block_size = 1048576, max_threads = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3, allow_experimental_analyzer=0, parallel_replicas_min_number_of_rows_per_replica=10000; +SETTINGS max_block_size = 1048576, max_threads = 1, allow_experimental_parallel_reading_from_replicas = 1, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', max_parallel_replicas = 3, parallel_replicas_min_number_of_rows_per_replica=10000; +DROP POLICY url_na_log_policy0 ON url_na_log; +DROP TABLE url_na_log; From 38cbc2c6c40541cc916bc591cd68b7eef70b1162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 7 Mar 2024 18:57:16 +0100 Subject: [PATCH 167/985] Restore digits --- base/base/itoa.cpp | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index 9fefc9f0f07..ef844ff68a8 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -34,42 +34,37 @@ template -int digits10(T x) +inline int digits10(T x) { - if (x < T(10ULL)) + if (x < 10ULL) return 1; - if (x < T(100ULL)) + if (x < 100ULL) return 2; - if constexpr (sizeof(T) == 1) + if (x < 1000ULL) return 3; - else + + if (x < 1000000000000ULL) { - if (x < T(1000ULL)) - return 3; - - if (x < T(1000000000000ULL)) + if (x < 100000000ULL) { - if (x < T(100000000ULL)) + if (x < 1000000ULL) { - if (x < T(1000000ULL)) - { - if (x < T(10000ULL)) - return 4; - else - return 5 + (x >= T(100000ULL)); - } - - return 7 + (x >= T(10000000ULL)); + if (x < 10000ULL) + return 4; + else + return 5 + (x >= 100000ULL); } - if (x < T(10000000000ULL)) - return 9 + (x >= T(1000000000ULL)); - - return 11 + (x >= T(100000000000ULL)); + return 7 + (x >= 10000000ULL); } - return 12 + digits10(x / T(1000000000000ULL)); + if (x < 10000000000ULL) + return 9 + (x >= 1000000000ULL); + + return 11 + (x >= 100000000000ULL); } + + return 12 + digits10(x / 1000000000000ULL); } From a00a1fd7b4c9a4e83eeb746da781b27c74dcd0b2 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Thu, 7 Mar 2024 14:45:15 -0400 Subject: [PATCH 168/985] Adds readWKT docs. --- .../en/sql-reference/functions/geo/polygon.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md index 4a8653965c2..35e2280e5cc 100644 --- a/docs/en/sql-reference/functions/geo/polygon.md +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -53,6 +53,62 @@ String starting with `POLYGON` Polygon +## readWKTPoint + +The `readWKTPoint` function in ClickHouse parses a Well-Known Text (WKT) representation of a Point geometry and returns a point in the internal ClickHouse format. + +### Syntax + +```sql +readWKTPoint(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a Point geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the Point geometry. + +### Example + +```sql +SELECT readWKTPoint('POINT (1.2 3.4)'); +``` + +```response +(1.2,3.4) +``` + +## readWKTRing + +Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format. + +### Syntax + +```sql +readWKTRing(wkt_string) +``` + +### Arguments + +- `wkt_string`: The input WKT string representing a Polygon geometry. + +### Returned value + +The function returns a ClickHouse internal representation of the ring (closed linestring) geometry. + +### Example + +```sql +SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)'); +``` + +```response +[(1,1),(2,2),(3,3),(1,1)] +``` + ## polygonsWithinSpherical Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html From 444595ac576438c9d0a259debf776187ddd3fcce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 7 Mar 2024 20:00:07 +0100 Subject: [PATCH 169/985] More speed please Mr. compiler --- base/base/itoa.cpp | 165 +++++++++++++++++++++++++++++++-------------- 1 file changed, 114 insertions(+), 51 deletions(-) diff --git a/base/base/itoa.cpp b/base/base/itoa.cpp index ef844ff68a8..08912edf3ea 100644 --- a/base/base/itoa.cpp +++ b/base/base/itoa.cpp @@ -72,7 +72,7 @@ namespace { template -static constexpr T pow10(size_t x) +ALWAYS_INLINE inline constexpr T pow10(size_t x) { return x ? 10 * pow10(x - 1) : 1; } @@ -143,7 +143,7 @@ struct QuotientAndRemainder }; template -QuotientAndRemainder static inline split(UnsignedOfSize value) +QuotientAndRemainder inline split(UnsignedOfSize value) { constexpr DivisionBy10PowN division; @@ -154,7 +154,7 @@ QuotientAndRemainder static inline split(UnsignedOfSize value) } -static inline char * outDigit(char * p, uint8_t value) +ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value) { *p = '0' + value; ++p; @@ -176,7 +176,7 @@ static const char digits[201] = "00010203040506070809" "80818283848586878889" "90919293949596979899"; -static inline char * outTwoDigits(char * p, uint8_t value) +ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value) { memcpy(p, &digits[value * 2], 2); p += 2; @@ -187,9 +187,9 @@ static inline char * outTwoDigits(char * p, uint8_t value) namespace convert { template -static char * head(char * p, UInt u); +char * head(char * p, UInt u); template -static char * tail(char * p, UInt u); +char * tail(char * p, UInt u); //===----------------------------------------------------------===// // head: find most significant digit, skip leading zeros @@ -198,7 +198,7 @@ static char * tail(char * p, UInt u); // "x" contains quotient and remainder after division by 10^N // quotient is less than 10^N template -static inline char * head(char * p, QuotientAndRemainder x) +ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder x) { p = head(p, UnsignedOfSize(x.quotient)); p = tail(p, x.remainder); @@ -207,14 +207,14 @@ static inline char * head(char * p, QuotientAndRemainder x) // "u" is less than 10^2*N template -static inline char * head(char * p, UInt u) +ALWAYS_INLINE inline char * head(char * p, UInt u) { return u < pow10>(N) ? head(p, UnsignedOfSize(u)) : head(p, split(u)); } // recursion base case, selected when "u" is one byte template <> -inline char * head, 1>(char * p, UnsignedOfSize<1> u) +ALWAYS_INLINE inline char * head, 1>(char * p, UnsignedOfSize<1> u) { return u < 10 ? outDigit(p, u) : outTwoDigits(p, u); } @@ -225,7 +225,7 @@ inline char * head, 1>(char * p, UnsignedOfSize<1> u) // recursive step, "u" is less than 10^2*N template -static inline char * tail(char * p, UInt u) +ALWAYS_INLINE inline char * tail(char * p, UInt u) { QuotientAndRemainder x = split(u); p = tail(p, UnsignedOfSize(x.quotient)); @@ -235,7 +235,7 @@ static inline char * tail(char * p, UInt u) // recursion base case, selected when "u" is one byte template <> -inline char * tail, 1>(char * p, UnsignedOfSize<1> u) +ALWAYS_INLINE inline char * tail, 1>(char * p, UnsignedOfSize<1> u) { return outTwoDigits(p, u); } @@ -244,9 +244,8 @@ inline char * tail, 1>(char * p, UnsignedOfSize<1> u) // large values are >= 10^2*N // where x contains quotient and remainder after division by 10^N //===----------------------------------------------------------===// - template -static inline char * large(char * p, QuotientAndRemainder x) +ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder x) { QuotientAndRemainder y = split(x.quotient); p = head(p, UnsignedOfSize(y.quotient)); @@ -259,9 +258,8 @@ static inline char * large(char * p, QuotientAndRemainder x) // handle values of "u" that might be >= 10^2*N // where N is the size of "u" in bytes //===----------------------------------------------------------===// - template -static inline char * uitoa(char * p, UInt u) +ALWAYS_INLINE inline char * uitoa(char * p, UInt u) { if (u < pow10>(N)) return head(p, UnsignedOfSize(u)); @@ -272,7 +270,7 @@ static inline char * uitoa(char * p, UInt u) // selected when "u" is one byte template <> -inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) +ALWAYS_INLINE inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) { if (u < 10) return outDigit(p, u); @@ -292,14 +290,14 @@ inline char * uitoa, 1>(char * p, UnsignedOfSize<1> u) // itoa: handle unsigned integral operands (selected by SFINAE) template && std::is_integral_v> * = nullptr> -static inline char * itoa(U u, char * p) +ALWAYS_INLINE inline char * itoa(U u, char * p) { return convert::uitoa(p, u); } // itoa: handle signed integral operands (selected by SFINAE) template && std::is_integral_v> * = nullptr> -static inline char * itoa(I i, char * p) +ALWAYS_INLINE inline char * itoa(I i, char * p) { // Need "mask" to be filled with a copy of the sign bit. // If "i" is a negative value, then the result of "operator >>" @@ -335,63 +333,128 @@ static inline char * itoa(I i, char * p) } -template -static NO_INLINE char * writeUIntText(T _x, char * p) +const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull; +constexpr int max_multiple_of_hundred_blocks = 9; +static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0); + +ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p) { - static_assert(std::is_same_v || std::is_same_v); - using T_ = std::conditional_t< - std::is_same_v, - unsigned __int128, + /// If we the highest 8 byte item is empty, we can print only the lowest item as i64 + if (_x.items[UInt128::_impl::little(1)] == 0) + return convert::itoa(_x.items[UInt128::_impl::little(0)], p); + + /// Doing operations using __int128 is faster, as we already rely on this feature + using T = unsigned __int128; + T x = (T(_x.items[UInt128::_impl::little(1)]) << 64) + T(_x.items[UInt128::_impl::little(0)]); + + /// We are going to accumulate blocks of 2 digits to print until the number is small enough to be printed as u64 + /// To do this we could do: x / 100, x % 100 + /// But this is too many iterations with long integers, so instead we can divide by a much longer integer + /// max_multiple_of_hundred_that_fits_in_64_bits and then get the blocks out of this (as u64) + static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits; + static const T largest_uint64 = std::numeric_limits::max(); + uint8_t two_values[20] = {0}; // 39 Max characters / 2 + + int current_block = 0; + while (x > largest_uint64) + { + uint64_t remainder = uint64_t(x % large_divisor); + x /= large_divisor; + + int pos = current_block; + while (remainder) + { + two_values[pos] = uint8_t(remainder % 100); + pos++; + remainder /= 100; + } + current_block += max_multiple_of_hundred_blocks; + } + + char * highest_part_print = convert::itoa(uint64_t(x), p); + for (int i = 0; i < current_block; i++) + { + outTwoDigits(highest_part_print, two_values[current_block - 1 - i]); + highest_part_print += 2; + } + + return highest_part_print; +} + +ALWAYS_INLINE inline char * writeUIntText(UInt256 _x, char * p) +{ + /// If possible, treat it as a smaller integer as they are much faster to print + if (_x.items[UInt256::_impl::little(3)] == 0 && _x.items[UInt256::_impl::little(2)] == 0) + return writeUIntText(UInt128{_x.items[UInt256::_impl::little(0)], _x.items[UInt256::_impl::little(1)]}, p); + + /// If available (x86) we transform from our custom class to _BitInt(256) which has better support in the compiler + /// and produces better code + using T = #if defined(__x86_64__) # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wbit-int-extension" unsigned _BitInt(256) # pragma clang diagnostic pop #else - T + UInt256 #endif - >; + ; - T_ x; - T_ hundred(100ULL); - if constexpr (std::is_same_v) - { - x = (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); - } - else - { #if defined(__x86_64__) - x = (T_(_x.items[T::_impl::little(3)]) << 192) + (T_(_x.items[T::_impl::little(2)]) << 128) - + (T_(_x.items[T::_impl::little(1)]) << 64) + T_(_x.items[T::_impl::little(0)]); + T x = (T(_x.items[UInt256::_impl::little(3)]) << 192) + (T(_x.items[UInt256::_impl::little(2)]) << 128) + + (T(_x.items[UInt256::_impl::little(1)]) << 64) + T(_x.items[UInt256::_impl::little(0)]); #else - x = _x; + T x = _x; #endif + + /// Similar to writeUIntText(UInt128) only that in this case we will stop as soon as we reach the largest u128 + /// and switch to that function + uint8_t two_values[39] = {0}; // 78 Max characters / 2 + int current_pos = 0; + + static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits; + static const T largest_uint128 = T(std::numeric_limits::max()) << 64 | T(std::numeric_limits::max()); + + while (x > largest_uint128) + { + uint64_t remainder = uint64_t(x % large_divisor); + x /= large_divisor; + + int pos = current_pos; + while (remainder) + { + two_values[pos] = uint8_t(remainder % 100); + pos++; + remainder /= 100; + } + current_pos += max_multiple_of_hundred_blocks; } - int len = digits10(x); - auto * pp = p + len; - while (x >= hundred) +#if defined(__x86_64__) + UInt128 pending{uint64_t(x), uint64_t(x >> 64)}; +#else + UInt128 pending{x.items[UInt256::_impl::little(0)], x.items[UInt256::_impl::little(1)]}; +#endif + + char * highest_part_print = writeUIntText(pending, p); + for (int i = 0; i < current_pos; i++) { - const auto i = x % hundred; - x /= hundred; - pp -= 2; - outTwoDigits(pp, i); + outTwoDigits(highest_part_print, two_values[current_pos - 1 - i]); + highest_part_print += 2; } - if (x < 10) - *p = '0' + x; - else - outTwoDigits(p, x); - return p + len; + + return highest_part_print; } -static ALWAYS_INLINE inline char * writeLeadingMinus(char * pos) + +ALWAYS_INLINE inline char * writeLeadingMinus(char * pos) { *pos = '-'; return pos + 1; } template -static ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) +ALWAYS_INLINE inline char * writeSIntText(T x, char * pos) { static_assert(std::is_same_v || std::is_same_v); From 2bc4d27ac6f42af79120dac72b25db6e36ee4b42 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 7 Mar 2024 19:24:39 +0000 Subject: [PATCH 170/985] Bye bye --- docker/packager/README.md | 1 - docs/en/operations/backup.md | 6 +- .../operations/utilities/clickhouse-copier.md | 187 -- docs/en/operations/utilities/index.md | 2 - .../sql-reference/statements/alter/column.md | 2 +- docs/ru/getting-started/tutorial.md | 4 - docs/ru/operations/backup.md | 6 - .../operations/utilities/clickhouse-copier.md | 183 -- docs/ru/operations/utilities/index.md | 1 - .../sql-reference/statements/alter/column.md | 4 +- docs/zh/getting-started/tutorial.md | 2 - docs/zh/operations/backup.md | 6 - .../operations/utilities/clickhouse-copier.md | 172 -- docs/zh/operations/utilities/index.md | 1 - docs/zh/sql-reference/statements/alter.md | 2 +- packages/clickhouse-server.yaml | 2 - programs/CMakeLists.txt | 2 - programs/copier/Aliases.h | 15 - programs/copier/CMakeLists.txt | 28 - programs/copier/ClusterCopier.cpp | 2076 ----------------- programs/copier/ClusterCopier.h | 240 -- programs/copier/ClusterCopierApp.cpp | 252 -- programs/copier/ClusterCopierApp.h | 99 - programs/copier/ClusterPartition.h | 22 - programs/copier/Internals.cpp | 280 --- programs/copier/Internals.h | 198 -- programs/copier/ShardPartition.cpp | 70 - programs/copier/ShardPartition.h | 54 - programs/copier/ShardPartitionPiece.cpp | 64 - programs/copier/ShardPartitionPiece.h | 43 - programs/copier/StatusAccumulator.cpp | 48 - programs/copier/StatusAccumulator.h | 27 - programs/copier/TaskCluster.cpp | 74 - programs/copier/TaskCluster.h | 51 - programs/copier/TaskShard.cpp | 37 - programs/copier/TaskShard.h | 56 - programs/copier/TaskTable.cpp | 222 -- programs/copier/TaskTable.h | 173 -- programs/copier/ZooKeeperStaff.h | 221 -- programs/copier/clickhouse-copier.cpp | 1 - .../testdata/configs/xml/config.xml | 2 +- .../testdata/configs/yaml/config.yaml | 2 +- .../testdata/configs/yandex_xml/config.xml | 2 +- programs/install/Install.cpp | 1 - programs/main.cpp | 2 - programs/server/config.xml | 2 +- programs/server/config.yaml.example | 2 +- src/Storages/StorageDistributed.h | 1 - tests/integration/README.md | 2 +- .../test_cluster_copier/__init__.py | 0 .../configs/conf.d/clusters.xml | 73 - .../configs/conf.d/clusters_trivial.xml | 20 - .../configs/conf.d/ddl.xml | 5 - .../configs/conf.d/query_log.xml | 14 - .../configs/config-copier.xml | 11 - .../test_cluster_copier/configs/users.xml | 34 - .../configs_three_nodes/conf.d/clusters.xml | 27 - .../configs_three_nodes/conf.d/ddl.xml | 5 - .../configs_three_nodes/config-copier.xml | 27 - .../configs_three_nodes/users.xml | 32 - .../configs_two_nodes/conf.d/clusters.xml | 22 - .../configs_two_nodes/conf.d/ddl.xml | 5 - .../conf.d/storage_configuration.xml | 34 - .../configs_two_nodes/config-copier.xml | 19 - .../configs_two_nodes/users.xml | 32 - .../test_cluster_copier/task0_description.xml | 95 - .../task_drop_target_partition.xml | 41 - .../task_month_to_week_description.xml | 99 - .../test_cluster_copier/task_no_arg.xml | 39 - .../test_cluster_copier/task_no_index.xml | 109 - .../task_non_partitioned_table.xml | 39 - .../test_cluster_copier/task_self_copy.xml | 63 - .../test_cluster_copier/task_skip_index.xml | 39 - .../test_cluster_copier/task_taxi_data.xml | 42 - .../task_test_block_size.xml | 101 - .../test_cluster_copier/task_trivial.xml | 63 - .../task_trivial_without_arguments.xml | 63 - .../test_cluster_copier/task_ttl_columns.xml | 39 - .../task_ttl_move_to_volume.xml | 39 - .../task_with_different_schema.xml | 39 - tests/integration/test_cluster_copier/test.py | 653 ------ .../test_cluster_copier/test_three_nodes.py | 286 --- .../test_cluster_copier/test_trivial.py | 227 -- .../test_cluster_copier/test_two_nodes.py | 597 ----- .../test_config_xml_full/configs/config.xml | 2 +- 85 files changed, 12 insertions(+), 7973 deletions(-) delete mode 100644 docs/en/operations/utilities/clickhouse-copier.md delete mode 100644 docs/ru/operations/utilities/clickhouse-copier.md delete mode 100644 docs/zh/operations/utilities/clickhouse-copier.md delete mode 100644 programs/copier/Aliases.h delete mode 100644 programs/copier/CMakeLists.txt delete mode 100644 programs/copier/ClusterCopier.cpp delete mode 100644 programs/copier/ClusterCopier.h delete mode 100644 programs/copier/ClusterCopierApp.cpp delete mode 100644 programs/copier/ClusterCopierApp.h delete mode 100644 programs/copier/ClusterPartition.h delete mode 100644 programs/copier/Internals.cpp delete mode 100644 programs/copier/Internals.h delete mode 100644 programs/copier/ShardPartition.cpp delete mode 100644 programs/copier/ShardPartition.h delete mode 100644 programs/copier/ShardPartitionPiece.cpp delete mode 100644 programs/copier/ShardPartitionPiece.h delete mode 100644 programs/copier/StatusAccumulator.cpp delete mode 100644 programs/copier/StatusAccumulator.h delete mode 100644 programs/copier/TaskCluster.cpp delete mode 100644 programs/copier/TaskCluster.h delete mode 100644 programs/copier/TaskShard.cpp delete mode 100644 programs/copier/TaskShard.h delete mode 100644 programs/copier/TaskTable.cpp delete mode 100644 programs/copier/TaskTable.h delete mode 100644 programs/copier/ZooKeeperStaff.h delete mode 100644 programs/copier/clickhouse-copier.cpp delete mode 100644 tests/integration/test_cluster_copier/__init__.py delete mode 100644 tests/integration/test_cluster_copier/configs/conf.d/clusters.xml delete mode 100644 tests/integration/test_cluster_copier/configs/conf.d/clusters_trivial.xml delete mode 100644 tests/integration/test_cluster_copier/configs/conf.d/ddl.xml delete mode 100644 tests/integration/test_cluster_copier/configs/conf.d/query_log.xml delete mode 100644 tests/integration/test_cluster_copier/configs/config-copier.xml delete mode 100644 tests/integration/test_cluster_copier/configs/users.xml delete mode 100644 tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml delete mode 100644 tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml delete mode 100644 tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml delete mode 100644 tests/integration/test_cluster_copier/configs_three_nodes/users.xml delete mode 100644 tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml delete mode 100644 tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml delete mode 100644 tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml delete mode 100644 tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml delete mode 100644 tests/integration/test_cluster_copier/configs_two_nodes/users.xml delete mode 100644 tests/integration/test_cluster_copier/task0_description.xml delete mode 100644 tests/integration/test_cluster_copier/task_drop_target_partition.xml delete mode 100644 tests/integration/test_cluster_copier/task_month_to_week_description.xml delete mode 100644 tests/integration/test_cluster_copier/task_no_arg.xml delete mode 100644 tests/integration/test_cluster_copier/task_no_index.xml delete mode 100644 tests/integration/test_cluster_copier/task_non_partitioned_table.xml delete mode 100644 tests/integration/test_cluster_copier/task_self_copy.xml delete mode 100644 tests/integration/test_cluster_copier/task_skip_index.xml delete mode 100644 tests/integration/test_cluster_copier/task_taxi_data.xml delete mode 100644 tests/integration/test_cluster_copier/task_test_block_size.xml delete mode 100644 tests/integration/test_cluster_copier/task_trivial.xml delete mode 100644 tests/integration/test_cluster_copier/task_trivial_without_arguments.xml delete mode 100644 tests/integration/test_cluster_copier/task_ttl_columns.xml delete mode 100644 tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml delete mode 100644 tests/integration/test_cluster_copier/task_with_different_schema.xml delete mode 100644 tests/integration/test_cluster_copier/test.py delete mode 100644 tests/integration/test_cluster_copier/test_three_nodes.py delete mode 100644 tests/integration/test_cluster_copier/test_trivial.py delete mode 100644 tests/integration/test_cluster_copier/test_two_nodes.py diff --git a/docker/packager/README.md b/docker/packager/README.md index e0b7f38ea58..3604e8585a4 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -28,7 +28,6 @@ lrwxrwxrwx 1 root root 10 clickhouse-benchmark -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-clang -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-client -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-compressor -> clickhouse -lrwxrwxrwx 1 root root 10 clickhouse-copier -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-extract-from-config -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-format -> clickhouse lrwxrwxrwx 1 root root 10 clickhouse-lld -> clickhouse diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 2d9bf2a2ee8..8639af468c2 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -170,7 +170,7 @@ RESTORE TABLE test.table PARTITIONS '2', '3' ### Backups as tar archives -Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. +Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. Write a backup as a tar: ``` @@ -444,10 +444,6 @@ Often data that is ingested into ClickHouse is delivered through some sort of pe Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective. -### clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters. - For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well. ### Manipulations with Parts {#manipulations-with-parts} diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md deleted file mode 100644 index 0d329487504..00000000000 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,187 +0,0 @@ ---- -slug: /en/operations/utilities/clickhouse-copier -sidebar_position: 59 -sidebar_label: clickhouse-copier ---- - -# clickhouse-copier - -Copies data from the tables in one cluster to tables in another (or the same) cluster. - -:::note -To get a consistent copy, the data in the source tables and partitions should not change during the entire process. -::: - -You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes. - -After starting, `clickhouse-copier`: - -- Connects to ClickHouse Keeper and receives: - - - Copying jobs. - - The state of the copying jobs. - -- It performs the jobs. - - Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. - -`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly. - -To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located. - -## Running Clickhouse-copier {#running-clickhouse-copier} - -The utility should be run manually: - -``` bash -$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -Parameters: - -- `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. -- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false. -- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. - -## Format of keeper.xml {#format-of-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## Configuration of Copying Tasks {#configuration-of-copying-tasks} - -``` xml - - - - - - - false - - 127.0.0.1 - 9000 - - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index 8959073d00e..912a5b9ccb1 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -2,13 +2,11 @@ slug: /en/operations/utilities/ sidebar_position: 56 sidebar_label: List of tools and utilities -pagination_next: 'en/operations/utilities/clickhouse-copier' --- # List of tools and utilities - [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 0989c151d18..a23710b12bd 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -335,7 +335,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`). -If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query. +If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running. diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index 34064b6cf2f..8c827137e6d 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -585,10 +585,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -:::danger Внимание! -Этот подход не годится для сегментирования больших таблиц. Есть инструмент [clickhouse-copier](../operations/utilities/clickhouse-copier.md), специально предназначенный для перераспределения любых больших таблиц. -::: - Как и следовало ожидать, вычислительно сложные запросы работают втрое быстрее, если они выполняются на трёх серверах, а не на одном. В данном случае мы использовали кластер из трёх сегментов с одной репликой для каждого. diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 9ff13bbc8a6..50ee6b45e09 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -24,12 +24,6 @@ sidebar_label: "Резервное копирование данных" Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](../engines/table-engines/special/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным. -## clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](utilities/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse. - -Для небольших объёмов данных можно применять `INSERT INTO ... SELECT ...` в удалённые таблицы. - ## Манипуляции с партициями {#manipuliatsii-s-partitsiiami} ClickHouse позволяет использовать запрос `ALTER TABLE ... FREEZE PARTITION ...` для создания локальной копии партиций таблицы. Это реализуется с помощью жестких ссылок (hardlinks) на каталог `/var/lib/clickhouse/shadow/`, поэтому такая копия обычно не занимает дополнительное место на диске для старых данных. Созданные копии файлов не обрабатываются сервером ClickHouse, поэтому вы можете просто оставить их там: у вас будет простая резервная копия, которая не требует дополнительной внешней системы, однако при аппаратных проблемах вы можете утратить и актуальные данные и сохраненную копию. По этой причине, лучше удаленно скопировать их в другое место, а затем удалить локальную копию. Распределенные файловые системы и хранилища объектов по-прежнему являются хорошими вариантами для этого, однако можно использовать и обычные присоединенные файловые серверы с достаточно большой ёмкостью (в этом случае передача будет происходить через сетевую файловую систему или, возможно, [rsync](https://en.wikipedia.org/wiki/Rsync)). diff --git a/docs/ru/operations/utilities/clickhouse-copier.md b/docs/ru/operations/utilities/clickhouse-copier.md deleted file mode 100644 index da86ef2d35d..00000000000 --- a/docs/ru/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,183 +0,0 @@ ---- -slug: /ru/operations/utilities/clickhouse-copier -sidebar_position: 59 -sidebar_label: clickhouse-copier ---- - -# clickhouse-copier {#clickhouse-copier} - -Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера. - -Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper. - -После запуска, `clickhouse-copier`: - -- Соединяется с ZooKeeper и получает: - - - Задания на копирование. - - Состояние заданий на копирование. - -- Выполняет задания. - - Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их. - -`clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их «на лету». - -Для снижения сетевого трафика рекомендуем запускать `clickhouse-copier` на том же сервере, где находятся исходные данные. - -## Запуск Clickhouse-copier {#zapusk-clickhouse-copier} - -Утилиту следует запускать вручную следующим образом: - -``` bash -$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -Параметры запуска: - -- `daemon` - запускает `clickhouse-copier` в режиме демона. -- `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper. -- `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`. -- `task-file` - необязательный путь к файлу с описанием конфигурация заданий для загрузки в ZooKeeper. -- `task-upload-force` - Загрузить `task-file` в ZooKeeper даже если уже было загружено. -- `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен. - -## Формат Zookeeper.xml {#format-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie} - -``` xml - - - - - - - false - - 127.0.0.1 - 9000 - - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится. diff --git a/docs/ru/operations/utilities/index.md b/docs/ru/operations/utilities/index.md index 9eb90a3037c..e4b01a0276d 100644 --- a/docs/ru/operations/utilities/index.md +++ b/docs/ru/operations/utilities/index.md @@ -7,7 +7,6 @@ sidebar_position: 56 # Утилиты ClickHouse {#utility-clickhouse} - [clickhouse-local](clickhouse-local.md) - позволяет выполнять SQL-запросы над данными без остановки сервера ClickHouse, подобно утилите `awk`. -- [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные с одного кластера на другой. - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — устанавливает соединение с сервером ClickHouse и запускает циклическое выполнение указанных запросов. - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — позволяет форматировать входящие запросы. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные. diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index 385a9835eca..2ea045f4ae3 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -94,7 +94,7 @@ RENAME COLUMN [IF EXISTS] name to new_name Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно. -**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`. +**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`. Пример: @@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp; Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`). -Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md). +Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`. diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index 989cf5f57d8..d0c9bda83ef 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -582,8 +582,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand()); INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -!!! warning "注意:" - 这种方法不适合大型表的分片。 有一个单独的工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。 正如您所期望的那样,如果计算量大的查询使用3台服务器而不是一个,则运行速度快N倍。 diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 6d491f9c2f7..48e852b4228 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -24,12 +24,6 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD" 某些本地文件系统提供快照功能(例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)),但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本,并将它们与用于`SELECT` 查询的 [分布式](../engines/table-engines/special/distributed.md) 表分离。 任何修改数据的查询都无法访问此类副本上的快照。 作为回报,这些副本可能具有特殊的硬件配置,每个服务器附加更多的磁盘,这将是经济高效的。 -## clickhouse-copier {#clickhouse-copier} - -[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具,最初创建它是为了用于重新切分pb大小的表。 因为它能够在ClickHouse表和集群之间可靠地复制数据,所以它也可用于备份和还原数据。 - -对于较小的数据量,一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。 - ## part操作 {#manipulations-with-parts} ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的,所以它通常不会因为旧数据而占用额外的磁盘空间。 创建的文件副本不由ClickHouse服务器处理,所以你可以把它们留在那里:你将有一个简单的备份,不需要任何额外的外部系统,但它仍然容易出现硬件问题。 出于这个原因,最好将它们远程复制到另一个位置,然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择,但是具有足够大容量的正常附加文件服务器也可以工作(在这种情况下,传输将通过网络文件系统或者也许是 [rsync](https://en.wikipedia.org/wiki/Rsync) 来进行). diff --git a/docs/zh/operations/utilities/clickhouse-copier.md b/docs/zh/operations/utilities/clickhouse-copier.md deleted file mode 100644 index b01edd9257c..00000000000 --- a/docs/zh/operations/utilities/clickhouse-copier.md +++ /dev/null @@ -1,172 +0,0 @@ ---- -slug: /zh/operations/utilities/clickhouse-copier ---- -# clickhouse-copier {#clickhouse-copier} - -将数据从一个群集中的表复制到另一个(或相同)群集中的表。 - -您可以运行多个 `clickhouse-copier` 不同服务器上的实例执行相同的作业。 ZooKeeper用于同步进程。 - -开始后, `clickhouse-copier`: - -- 连接到ZooKeeper并且接收: - - - 复制作业。 - - 复制作业的状态。 - -- 它执行的工作。 - - 每个正在运行的进程都会选择源集群的“最接近”分片,然后将数据复制到目标集群,并在必要时重新分片数据。 - -`clickhouse-copier` 跟踪ZooKeeper中的更改,并实时应用它们。 - -为了减少网络流量,我们建议运行 `clickhouse-copier` 在源数据所在的同一服务器上。 - -## 运行Clickhouse-copier {#running-clickhouse-copier} - -该实用程序应手动运行: - -``` bash -clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir -``` - -参数: - -- `daemon` — 在守护进程模式下启动`clickhouse-copier`。 -- `config` — `zookeeper.xml`文件的路径,其中包含用于连接ZooKeeper的参数。 -- `task-path` — ZooKeeper节点的路径。 该节点用于同步`clickhouse-copier`进程和存储任务。 任务存储在`$task-path/description`中。 -- `task-file` — 可选的非必须参数, 指定一个包含任务配置的参数文件, 用于初始上传到ZooKeeper。 -- `task-upload-force` — 即使节点已经存在,也强制上载`task-file`。 -- `base-dir` — 日志和辅助文件的路径。 启动时,`clickhouse-copier`在`$base-dir`中创建`clickhouse-copier_YYYYMMHHSS_`子目录。 如果省略此参数,则会在启动`clickhouse-copier`的目录中创建目录。 - - - -## Zookeeper.xml格式 {#format-of-zookeeper-xml} - -``` xml - - - trace - 100M - 3 - - - - - 127.0.0.1 - 2181 - - - -``` - -## 复制任务的配置 {#configuration-of-copying-tasks} - -``` xml - - - - - - false - - 127.0.0.1 - 9000 - - - ... - - - - ... - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - test - hits - - - destination_cluster - test - hits2 - - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}') - PARTITION BY toMonday(date) - ORDER BY (CounterID, EventDate) - - - - jumpConsistentHash(intHash64(UserID), 2) - - - CounterID != 0 - - - - '2018-02-26' - '2018-03-05' - ... - - - - - - ... - - ... - - -``` - -`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如,如果你改变的值 `max_workers`,运行任务的进程数也会发生变化。 diff --git a/docs/zh/operations/utilities/index.md b/docs/zh/operations/utilities/index.md index af158baf275..cebe312450c 100644 --- a/docs/zh/operations/utilities/index.md +++ b/docs/zh/operations/utilities/index.md @@ -4,5 +4,4 @@ slug: /zh/operations/utilities/ # 实用工具 {#clickhouse-utility} - [本地查询](clickhouse-local.md) — 在不停止ClickHouse服务的情况下,对数据执行查询操作(类似于 `awk` 命令)。 -- [跨集群复制](clickhouse-copier.md) — 在不同集群间复制数据。 - [性能测试](clickhouse-benchmark.md) — 连接到Clickhouse服务器,执行性能测试。 diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 002d5102fa3..48665ae04ab 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) 不支持对primary key或者sampling key中的列(在 `ENGINE` 表达式中用到的列)进行删除操作。改变包含在primary key中的列的类型时,如果操作不会导致数据的变化(例如,往Enum中添加一个值,或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。 -如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。 +如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。 `ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。 diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 7894129b8e3..dc183ead102 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -50,8 +50,6 @@ contents: dst: /etc/init.d/clickhouse-server - src: clickhouse-server.service dst: /lib/systemd/system/clickhouse-server.service -- src: root/usr/bin/clickhouse-copier - dst: /usr/bin/clickhouse-copier - src: root/usr/bin/clickhouse-server dst: /usr/bin/clickhouse-server # clickhouse-keeper part diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 62bcf068879..d945fdf4a6f 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -122,7 +122,6 @@ add_subdirectory (local) add_subdirectory (benchmark) add_subdirectory (extract-from-config) add_subdirectory (compressor) -add_subdirectory (copier) add_subdirectory (format) add_subdirectory (obfuscator) add_subdirectory (install) @@ -200,7 +199,6 @@ clickhouse_program_install(clickhouse-server server) clickhouse_program_install(clickhouse-client client chc) clickhouse_program_install(clickhouse-local local chl ch) clickhouse_program_install(clickhouse-benchmark benchmark) -clickhouse_program_install(clickhouse-copier copier) clickhouse_program_install(clickhouse-extract-from-config extract-from-config) clickhouse_program_install(clickhouse-compressor compressor) clickhouse_program_install(clickhouse-format format) diff --git a/programs/copier/Aliases.h b/programs/copier/Aliases.h deleted file mode 100644 index 02be3441acd..00000000000 --- a/programs/copier/Aliases.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -#include - -#include - -namespace DB -{ - using ConfigurationPtr = Poco::AutoPtr; - - using DatabaseAndTableName = std::pair; - using ListOfDatabasesAndTableNames = std::vector; -} diff --git a/programs/copier/CMakeLists.txt b/programs/copier/CMakeLists.txt deleted file mode 100644 index 2c17e70bc5e..00000000000 --- a/programs/copier/CMakeLists.txt +++ /dev/null @@ -1,28 +0,0 @@ -set(CLICKHOUSE_COPIER_SOURCES - "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp") - -set (CLICKHOUSE_COPIER_LINK - PRIVATE - clickhouse_common_zookeeper - clickhouse_common_config - clickhouse_parsers - clickhouse_functions - clickhouse_table_functions - clickhouse_aggregate_functions - string_utils - - PUBLIC - daemon -) - -set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - -clickhouse_program_add(copier) diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp deleted file mode 100644 index 59505d08f5c..00000000000 --- a/programs/copier/ClusterCopier.cpp +++ /dev/null @@ -1,2076 +0,0 @@ -#include "ClusterCopier.h" - -#include "Internals.h" -#include "StatusAccumulator.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace CurrentMetrics -{ - extern const Metric LocalThread; - extern const Metric LocalThreadActive; - extern const Metric LocalThreadScheduled; -} - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; - extern const int LOGICAL_ERROR; - extern const int UNFINISHED; - extern const int BAD_ARGUMENTS; -} - - -void ClusterCopier::init() -{ - auto zookeeper = getContext()->getZooKeeper(); - - task_description_watch_callback = [this] (const Coordination::WatchResponse & response) - { - if (response.error != Coordination::Error::ZOK) - return; - UInt64 version = ++task_description_version; - LOG_INFO(log, "Task description should be updated, local version {}", version); - }; - - task_description_path = task_zookeeper_path + "/description"; - task_cluster = std::make_unique(task_zookeeper_path, working_database_name); - - reloadTaskDescription(); - - task_cluster->loadTasks(*task_cluster_current_config); - getContext()->setClustersConfig(task_cluster_current_config, false, task_cluster->clusters_prefix); - - /// Set up shards and their priority - task_cluster->random_engine.seed(randomSeed()); - for (auto & task_table : task_cluster->table_tasks) - { - task_table.cluster_pull = getContext()->getCluster(task_table.cluster_pull_name); - task_table.cluster_push = getContext()->getCluster(task_table.cluster_push_name); - task_table.initShards(task_cluster->random_engine); - } - - LOG_INFO(log, "Will process {} table tasks", task_cluster->table_tasks.size()); - - /// Do not initialize tables, will make deferred initialization in process() - - zookeeper->createAncestors(getWorkersPathVersion() + "/"); - zookeeper->createAncestors(getWorkersPath() + "/"); - /// Init status node - zookeeper->createIfNotExists(task_zookeeper_path + "/status", "{}"); -} - -template -decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) -{ - std::exception_ptr exception; - - if (max_tries == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform zero retries"); - - for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) - { - try - { - return func(); - } - catch (...) - { - exception = std::current_exception(); - if (try_number < max_tries) - { - tryLogCurrentException(log, "Will retry"); - std::this_thread::sleep_for(retry_delay_ms); - } - } - } - - std::rethrow_exception(exception); -} - - -void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard) -{ - TaskTable & task_table = task_shard->task_table; - - LOG_INFO(log, "Discover partitions of shard {}", task_shard->getDescription()); - - auto get_partitions = [&] () { return getShardPartitions(timeouts, *task_shard); }; - auto existing_partitions_names = retry(get_partitions, 60); - Strings filtered_partitions_names; - Strings missing_partitions; - - /// Check that user specified correct partition names - auto check_partition_format = [] (const DataTypePtr & type, const String & partition_text_quoted) - { - MutableColumnPtr column_dummy = type->createColumn(); - ReadBufferFromString rb(partition_text_quoted); - - try - { - type->getDefaultSerialization()->deserializeTextQuoted(*column_dummy, rb, FormatSettings()); - } - catch (Exception & e) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition {} has incorrect format. {}", partition_text_quoted, e.displayText()); - } - }; - - if (task_table.has_enabled_partitions) - { - /// Process partition in order specified by - for (const String & partition_name : task_table.enabled_partitions) - { - /// Check that user specified correct partition names - check_partition_format(task_shard->partition_key_column.type, partition_name); - - auto it = existing_partitions_names.find(partition_name); - - /// Do not process partition if it is not in enabled_partitions list - if (it == existing_partitions_names.end()) - { - missing_partitions.emplace_back(partition_name); - continue; - } - - filtered_partitions_names.emplace_back(*it); - } - - for (const String & partition_name : existing_partitions_names) - { - if (!task_table.enabled_partitions_set.contains(partition_name)) - { - LOG_INFO(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id); - } - } - } - else - { - for (const String & partition_name : existing_partitions_names) - filtered_partitions_names.emplace_back(partition_name); - } - - for (const String & partition_name : filtered_partitions_names) - { - const size_t number_of_splits = task_table.number_of_splits; - task_shard->partition_tasks.emplace(partition_name, ShardPartition(*task_shard, partition_name, number_of_splits)); - task_shard->checked_partitions.emplace(partition_name, true); - - auto shard_partition_it = task_shard->partition_tasks.find(partition_name); - PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces; - - for (size_t piece_number = 0; piece_number < number_of_splits; ++piece_number) - { - bool res = checkPresentPartitionPiecesOnCurrentShard(timeouts, *task_shard, partition_name, piece_number); - shard_partition_pieces.emplace_back(shard_partition_it->second, piece_number, res); - } - } - - if (!missing_partitions.empty()) - { - WriteBufferFromOwnString ss; - for (const String & missing_partition : missing_partitions) - ss << " " << missing_partition; - - LOG_WARNING(log, "There are no {} partitions from enabled_partitions in shard {} :{}", missing_partitions.size(), task_shard->getDescription(), ss.str()); - } - - LOG_INFO(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription()); -} - -void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads) -{ - /// Fetch partitions list from a shard - { - ThreadPool thread_pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); - - for (const TaskShardPtr & task_shard : task_table.all_shards) - thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() - { - setThreadName("DiscoverPartns"); - discoverShardPartitions(timeouts, task_shard); - }); - - LOG_INFO(log, "Waiting for {} setup jobs", thread_pool.active()); - thread_pool.wait(); - } -} - -void ClusterCopier::uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force) -{ - auto local_task_description_path = task_path + "/description"; - - String task_config_str; - { - ReadBufferFromFile in(task_file); - readStringUntilEOF(task_config_str, in); - } - if (task_config_str.empty()) - return; - - auto zookeeper = getContext()->getZooKeeper(); - - zookeeper->createAncestors(local_task_description_path); - auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - if (code != Coordination::Error::ZOK && force) - zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent); - - LOG_INFO(log, "Task description {} uploaded to {} with result {} ({})", - ((code != Coordination::Error::ZOK && !force) ? "not " : ""), local_task_description_path, code, Coordination::errorMessage(code)); -} - -void ClusterCopier::reloadTaskDescription() -{ - auto zookeeper = getContext()->getZooKeeper(); - task_description_watch_zookeeper = zookeeper; - - Coordination::Stat stat{}; - - /// It will throw exception if such a node doesn't exist. - auto task_config_str = zookeeper->get(task_description_path, &stat); - - LOG_INFO(log, "Loading task description"); - task_cluster_current_config = getConfigurationFromXMLString(task_config_str); - - /// Setup settings - task_cluster->reloadSettings(*task_cluster_current_config); - getContext()->setSettings(task_cluster->settings_common); -} - -void ClusterCopier::updateConfigIfNeeded() -{ - UInt64 version_to_update = task_description_version; - bool is_outdated_version = task_description_current_version != version_to_update; - bool is_expired_session = !task_description_watch_zookeeper || task_description_watch_zookeeper->expired(); - - if (!is_outdated_version && !is_expired_session) - return; - - LOG_INFO(log, "Updating task description"); - reloadTaskDescription(); - - task_description_current_version = version_to_update; -} - -void ClusterCopier::process(const ConnectionTimeouts & timeouts) -{ - for (TaskTable & task_table : task_cluster->table_tasks) - { - LOG_INFO(log, "Process table task {} with {} shards, {} of them are local ones", task_table.table_id, task_table.all_shards.size(), task_table.local_shards.size()); - - if (task_table.all_shards.empty()) - continue; - - /// Discover partitions of each shard and total set of partitions - if (!task_table.has_enabled_partitions) - { - /// If there are no specified enabled_partitions, we must discover them manually - discoverTablePartitions(timeouts, task_table); - - /// After partitions of each shard are initialized, initialize cluster partitions - for (const TaskShardPtr & task_shard : task_table.all_shards) - { - for (const auto & partition_elem : task_shard->partition_tasks) - { - const String & partition_name = partition_elem.first; - task_table.cluster_partitions.emplace(partition_name, ClusterPartition{}); - } - } - - for (auto & partition_elem : task_table.cluster_partitions) - { - const String & partition_name = partition_elem.first; - - for (const TaskShardPtr & task_shard : task_table.all_shards) - task_shard->checked_partitions.emplace(partition_name); - - task_table.ordered_partition_names.emplace_back(partition_name); - } - } - else - { - /// If enabled_partitions are specified, assume that each shard has all partitions - /// We will refine partition set of each shard in future - - for (const String & partition_name : task_table.enabled_partitions) - { - task_table.cluster_partitions.emplace(partition_name, ClusterPartition{}); - task_table.ordered_partition_names.emplace_back(partition_name); - } - } - - task_table.watch.restart(); - - /// Retry table processing - bool table_is_done = false; - for (UInt64 num_table_tries = 1; num_table_tries <= max_table_tries; ++num_table_tries) - { - if (tryProcessTable(timeouts, task_table)) - { - table_is_done = true; - break; - } - } - - if (!table_is_done) - { - throw Exception(ErrorCodes::UNFINISHED, "Too many tries to process table {}. Abort remaining execution", - task_table.table_id); - } - } -} - -/// Protected section - - -/* - * Creates task worker node and checks maximum number of workers not to exceed the limit. - * To achieve this we have to check version of workers_version_path node and create current_worker_path - * node atomically. - * */ - -zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNeed( - const zkutil::ZooKeeperPtr & zookeeper, - const String & description, - bool unprioritized) -{ - std::chrono::milliseconds current_sleep_time = retry_delay_ms; - static constexpr std::chrono::milliseconds max_sleep_time(30000); // 30 sec - - if (unprioritized) - std::this_thread::sleep_for(current_sleep_time); - - String workers_version_path = getWorkersPathVersion(); - String workers_path = getWorkersPath(); - String current_worker_path = getCurrentWorkerNodePath(); - - UInt64 num_bad_version_errors = 0; - - while (true) - { - updateConfigIfNeeded(); - - Coordination::Stat stat; - zookeeper->get(workers_version_path, &stat); - auto version = stat.version; - zookeeper->get(workers_path, &stat); - - if (static_cast(stat.numChildren) >= task_cluster->max_workers) - { - LOG_INFO(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description); - - if (unprioritized) - current_sleep_time = std::min(max_sleep_time, current_sleep_time + retry_delay_ms); - - std::this_thread::sleep_for(current_sleep_time); - num_bad_version_errors = 0; - } - else - { - Coordination::Requests ops; - ops.emplace_back(zkutil::makeSetRequest(workers_version_path, description, version)); - ops.emplace_back(zkutil::makeCreateRequest(current_worker_path, description, zkutil::CreateMode::Ephemeral)); - Coordination::Responses responses; - auto code = zookeeper->tryMulti(ops, responses); - - if (code == Coordination::Error::ZOK || code == Coordination::Error::ZNODEEXISTS) - return zkutil::EphemeralNodeHolder::existing(current_worker_path, *zookeeper); - - if (code == Coordination::Error::ZBADVERSION) - { - ++num_bad_version_errors; - - /// Try to make fast retries - if (num_bad_version_errors > 3) - { - LOG_INFO(log, "A concurrent worker has just been added, will check free worker slots again"); - std::chrono::milliseconds random_sleep_time(std::uniform_int_distribution(1, 1000)(task_cluster->random_engine)); - std::this_thread::sleep_for(random_sleep_time); - num_bad_version_errors = 0; - } - } - else - throw Coordination::Exception(code); - } - } -} - - -bool ClusterCopier::checkPartitionPieceIsClean( - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock, - const String & task_status_path) -{ - LogicalClock task_start_clock; - - Coordination::Stat stat{}; - if (zookeeper->exists(task_status_path, &stat)) - task_start_clock = LogicalClock(stat.mzxid); - - return clean_state_clock.is_clean() && (!task_start_clock.hasHappened() || clean_state_clock.discovery_zxid <= task_start_clock); -} - - -bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition) -{ - bool answer = true; - for (size_t piece_number = 0; piece_number < task_table.number_of_splits; ++piece_number) - { - bool piece_is_done = checkPartitionPieceIsDone(task_table, partition_name, piece_number, shards_with_partition); - if (!piece_is_done) - LOG_INFO(log, "Partition {} piece {} is not already done.", partition_name, piece_number); - answer &= piece_is_done; - } - - return answer; -} - - -/* The same as function above - * Assume that we don't know on which shards do we have partition certain piece. - * We'll check them all (I mean shards that contain the whole partition) - * And shards that don't have certain piece MUST mark that piece is_done true. - * */ -bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name, - size_t piece_number, const TasksShard & shards_with_partition) -{ - LOG_INFO(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number); - - auto zookeeper = getContext()->getZooKeeper(); - - /// Collect all shards that contain partition piece number piece_number. - Strings piece_status_paths; - for (const auto & shard : shards_with_partition) - { - ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second; - ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number]; - piece_status_paths.emplace_back(shard_partition_piece.getShardStatusPath()); - } - - std::vector zxid1, zxid2; - - try - { - std::vector get_futures; - for (const String & path : piece_status_paths) - get_futures.emplace_back(zookeeper->asyncGet(path)); - - // Check that state is Finished and remember zxid - for (auto & future : get_futures) - { - auto res = future.get(); - - TaskStateWithOwner status = TaskStateWithOwner::fromString(res.data); - if (status.state != TaskState::Finished) - { - LOG_INFO(log, "The task {} is being rewritten by {}. Partition piece will be rechecked", res.data, status.owner); - return false; - } - - zxid1.push_back(res.stat.pzxid); - } - - const String piece_is_dirty_flag_path = task_table.getCertainPartitionPieceIsDirtyPath(partition_name, piece_number); - const String piece_is_dirty_cleaned_path = task_table.getCertainPartitionPieceIsCleanedPath(partition_name, piece_number); - const String piece_task_status_path = task_table.getCertainPartitionPieceTaskStatusPath(partition_name, piece_number); - - CleanStateClock clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - - const bool is_clean = checkPartitionPieceIsClean(zookeeper, clean_state_clock, piece_task_status_path); - - - if (!is_clean) - { - LOG_INFO(log, "Partition {} become dirty", partition_name); - return false; - } - - get_futures.clear(); - for (const String & path : piece_status_paths) - get_futures.emplace_back(zookeeper->asyncGet(path)); - - // Remember zxid of states again - for (auto & future : get_futures) - { - auto res = future.get(); - zxid2.push_back(res.stat.pzxid); - } - } - catch (const Coordination::Exception & e) - { - LOG_INFO(log, "A ZooKeeper error occurred while checking partition {} piece number {}. Will recheck the partition. Error: {}", partition_name, toString(piece_number), e.displayText()); - return false; - } - - // If all task is finished and zxid is not changed then partition could not become dirty again - for (UInt64 shard_num = 0; shard_num < piece_status_paths.size(); ++shard_num) - { - if (zxid1[shard_num] != zxid2[shard_num]) - { - LOG_INFO(log, "The task {} is being modified now. Partition piece will be rechecked", piece_status_paths[shard_num]); - return false; - } - } - - LOG_INFO(log, "Partition {} piece number {} is copied successfully", partition_name, toString(piece_number)); - return true; -} - - -TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name) -{ - bool inject_fault = false; - if (move_fault_probability > 0) - { - double value = std::uniform_real_distribution<>(0, 1)(task_table.task_cluster.random_engine); - inject_fault = value < move_fault_probability; - } - - LOG_INFO(log, "Try to move {} to destination table", partition_name); - - auto zookeeper = getContext()->getZooKeeper(); - - const auto current_partition_attach_is_active = task_table.getPartitionAttachIsActivePath(partition_name); - const auto current_partition_attach_is_done = task_table.getPartitionAttachIsDonePath(partition_name); - - /// Create ephemeral node to mark that we are active and process the partition - zookeeper->createAncestors(current_partition_attach_is_active); - zkutil::EphemeralNodeHolderPtr partition_attach_node_holder; - try - { - partition_attach_node_holder = zkutil::EphemeralNodeHolder::create(current_partition_attach_is_active, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone is already moving pieces {}", current_partition_attach_is_active); - return TaskStatus::Active; - } - - throw; - } - - - /// Exit if task has been already processed; - /// create blocking node to signal cleaning up if it is abandoned - { - String status_data; - if (zookeeper->tryGet(current_partition_attach_is_done, status_data)) - { - TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data); - if (status.state == TaskState::Finished) - { - LOG_INFO(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner); - return TaskStatus::Finished; - } - - /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process. - /// Initialize DROP PARTITION - LOG_INFO(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner); - - /// Remove is_done marker. - zookeeper->remove(current_partition_attach_is_done); - } - } - - - /// Try start processing, create node about it - { - String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); - zookeeper->create(current_partition_attach_is_done, start_state, zkutil::CreateMode::Persistent); - } - - - /// Try to drop destination partition in original table - if (task_table.allow_to_drop_target_partitions) - { - DatabaseAndTableName original_table = task_table.table_push; - - WriteBufferFromOwnString ss; - ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; - - UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); - if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) - return TaskStatus::Error; - - LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", - partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); - } - - /// Move partition to original destination table. - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - LOG_INFO(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number)); - - ASTPtr query_alter_ast; - String query_alter_ast_string; - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, - original_table.second + "_piece_" + - toString(current_piece_number)); - - Settings settings_push = task_cluster->settings_push; - ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_NODE; - - if (settings_push.alter_sync == 1) - execution_mode = ClusterExecutionMode::ON_EACH_SHARD; - - query_alter_ast_string += " ALTER TABLE " + getQuotedTable(original_table) + - ((partition_name == "'all'") ? " ATTACH PARTITION ID " : " ATTACH PARTITION ") + partition_name + - " FROM " + getQuotedTable(helping_table); - - LOG_INFO(log, "Executing ALTER query: {}", query_alter_ast_string); - - try - { - /// Try attach partition on each shard - UInt64 num_nodes = executeQueryOnCluster( - task_table.cluster_push, - query_alter_ast_string, - task_cluster->settings_push, - execution_mode); - - if (settings_push.alter_sync == 1) - { - LOG_INFO( - log, - "Destination tables {} have been executed alter query successfully on {} shards of {}", - getQuotedTable(task_table.table_push), - num_nodes, - task_table.cluster_push->getShardCount()); - - if (num_nodes != task_table.cluster_push->getShardCount()) - return TaskStatus::Error; - } - else - { - LOG_INFO(log, "Number of nodes that executed ALTER query successfully : {}", toString(num_nodes)); - } - } - catch (...) - { - LOG_INFO(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number)); - LOG_WARNING(log, "In case of non-replicated tables it can cause duplicates."); - throw; - } - - if (inject_fault) - throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); - } - - /// Create node to signal that we finished moving - /// Also increment a counter of processed partitions - { - const auto state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - const auto task_status = task_zookeeper_path + "/status"; - - /// Try until success - while (true) - { - Coordination::Stat stat; - auto status_json = zookeeper->get(task_status, &stat); - auto statuses = StatusAccumulator::fromJSON(status_json); - - /// Increment status for table. - (*statuses)[task_table.name_in_config].processed_partitions_count += 1; - auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses); - - Coordination::Requests ops; - ops.emplace_back(zkutil::makeSetRequest(current_partition_attach_is_done, state_finished, 0)); - ops.emplace_back(zkutil::makeSetRequest(task_status, statuses_to_commit, stat.version)); - - Coordination::Responses responses; - Coordination::Error code = zookeeper->tryMulti(ops, responses); - - if (code == Coordination::Error::ZOK) - break; - } - } - - return TaskStatus::Finished; -} - -/// This is needed to create internal Distributed table -/// Removes column's TTL expression from `CREATE` query -/// Removes MATEREALIZED or ALIAS columns not to copy additional and useless data over the network. -/// Removes data skipping indices. -ASTPtr ClusterCopier::removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns) -{ - const ASTs & column_asts = query_ast->as().columns_list->columns->children; - auto new_columns = std::make_shared(); - - for (const ASTPtr & column_ast : column_asts) - { - const auto & column = column_ast->as(); - - /// Skip this columns - if (!column.default_specifier.empty() && !allow_to_copy_alias_and_materialized_columns) - { - ColumnDefaultKind kind = columnDefaultKindFromString(column.default_specifier); - if (kind == ColumnDefaultKind::Materialized || kind == ColumnDefaultKind::Alias) - continue; - } - - /// Remove TTL on columns definition. - auto new_column_ast = column_ast->clone(); - auto & new_column = new_column_ast->as(); - if (new_column.ttl) - new_column.ttl.reset(); - - new_columns->children.emplace_back(new_column_ast); - } - - ASTPtr new_query_ast = query_ast->clone(); - auto & new_query = new_query_ast->as(); - - auto new_columns_list = std::make_shared(); - new_columns_list->set(new_columns_list->columns, new_columns); - - /// Skip indices and projections are not needed, because distributed table doesn't support it. - - new_query.replace(new_query.columns_list, new_columns_list); - - return new_query_ast; -} - -/// Replaces ENGINE and table name in a create query -std::shared_ptr rewriteCreateQueryStorage(const ASTPtr & create_query_ast, - const DatabaseAndTableName & new_table, - const ASTPtr & new_storage_ast) -{ - const auto & create = create_query_ast->as(); - auto res = std::make_shared(create); - - if (create.storage == nullptr || new_storage_ast == nullptr) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage is not specified"); - - res->setDatabase(new_table.first); - res->setTable(new_table.second); - - res->children.clear(); - res->set(res->columns_list, create.columns_list->clone()); - res->set(res->storage, new_storage_ast->clone()); - /// Just to make it better and don't store additional flag like `is_table_created` somewhere else - res->if_not_exists = true; - - return res; -} - - -bool ClusterCopier::tryDropPartitionPiece( - ShardPartition & task_partition, - const size_t current_piece_number, - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock) -{ - if (is_safe_mode) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP PARTITION is prohibited in safe mode"); - - TaskTable & task_table = task_partition.task_shard.task_table; - ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; - - const String current_shards_path = partition_piece.getPartitionPieceShardsPath(); - const String current_partition_active_workers_dir = partition_piece.getPartitionPieceActiveWorkersPath(); - const String is_dirty_flag_path = partition_piece.getPartitionPieceIsDirtyPath(); - const String dirty_cleaner_path = partition_piece.getPartitionPieceCleanerPath(); - const String is_dirty_cleaned_path = partition_piece.getPartitionPieceIsCleanedPath(); - - zkutil::EphemeralNodeHolder::Ptr cleaner_holder; - try - { - cleaner_holder = zkutil::EphemeralNodeHolder::create(dirty_cleaner_path, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number)); - std::this_thread::sleep_for(retry_delay_ms); - return false; - } - - throw; - } - - Coordination::Stat stat{}; - if (zookeeper->exists(current_partition_active_workers_dir, &stat)) - { - if (stat.numChildren != 0) - { - LOG_INFO(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren); - std::this_thread::sleep_for(retry_delay_ms); - return false; - } - else - { - zookeeper->remove(current_partition_active_workers_dir); - } - } - - { - zkutil::EphemeralNodeHolder::Ptr active_workers_lock; - try - { - active_workers_lock = zkutil::EphemeralNodeHolder::create(current_partition_active_workers_dir, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Partition {} is being filled now by somebody, sleep", task_partition.name); - return false; - } - - throw; - } - - // Lock the dirty flag - zookeeper->set(is_dirty_flag_path, host_id, clean_state_clock.discovery_version.value()); - zookeeper->tryRemove(partition_piece.getPartitionPieceCleanStartPath()); - CleanStateClock my_clock(zookeeper, is_dirty_flag_path, is_dirty_cleaned_path); - - /// Remove all status nodes - { - Strings children; - if (zookeeper->tryGetChildren(current_shards_path, children) == Coordination::Error::ZOK) - for (const auto & child : children) - { - zookeeper->removeRecursive(current_shards_path + "/" + child); - } - } - - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "ALTER TABLE " + getQuotedTable(helping_table); - query += ((task_partition.name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + task_partition.name + ""; - - /// TODO: use this statement after servers will be updated up to 1.1.54310 - // query += " DROP PARTITION ID '" + task_partition.name + "'"; - - ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - /// It is important, DROP PARTITION must be done synchronously - settings_push.alter_sync = 2; - - LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query); - /// We have to drop partition_piece on each replica - size_t num_shards = executeQueryOnCluster( - cluster_push, query, - settings_push, - ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP PARTITION was successfully executed on {} nodes of a cluster.", num_shards); - - /// Update the locking node - if (!my_clock.is_stale()) - { - zookeeper->set(is_dirty_flag_path, host_id, my_clock.discovery_version.value()); - if (my_clock.clean_state_version) - zookeeper->set(is_dirty_cleaned_path, host_id, my_clock.clean_state_version.value()); - else - zookeeper->create(is_dirty_cleaned_path, host_id, zkutil::CreateMode::Persistent); - } - else - { - LOG_INFO(log, "Clean state is altered when dropping the partition, cowardly bailing"); - /// clean state is stale - return false; - } - - LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name); - if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::Error::ZNODEEXISTS) - zookeeper->set(current_shards_path, host_id); - } - - LOG_INFO(log, "Partition {} piece {} is safe for work now.", task_partition.name, toString(current_piece_number)); - return true; -} - -bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) -{ - /// Create destination table - TaskStatus task_status = TaskStatus::Error; - - task_status = tryCreateDestinationTable(timeouts, task_table); - /// Exit if success - if (task_status != TaskStatus::Finished) - { - LOG_WARNING(log, "Create destination table failed "); - return false; - } - - /// Set all_partitions_count for table in Zookeeper - auto zookeeper = getContext()->getZooKeeper(); - while (true) - { - Coordination::Stat stat; - auto status_json = zookeeper->get(task_zookeeper_path + "/status", &stat); - auto statuses = StatusAccumulator::fromJSON(status_json); - - /// Exit if someone already set the initial value for this table. - if (statuses->find(task_table.name_in_config) != statuses->end()) - break; - (*statuses)[task_table.name_in_config] = StatusAccumulator::TableStatus - { - /*all_partitions_count=*/task_table.ordered_partition_names.size(), - /*processed_partition_count=*/0 - }; - - auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses); - auto error = zookeeper->trySet(task_zookeeper_path + "/status", statuses_to_commit, stat.version); - if (error == Coordination::Error::ZOK) - break; - } - - - /// An heuristic: if previous shard is already done, then check next one without sleeps due to max_workers constraint - bool previous_shard_is_instantly_finished = false; - - /// Process each partition that is present in cluster - for (const String & partition_name : task_table.ordered_partition_names) - { - if (!task_table.cluster_partitions.contains(partition_name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no expected partition {}. It is a bug", partition_name); - - ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name]; - - Stopwatch watch; - /// We will check all the shards of the table and check if they contain current partition. - TasksShard expected_shards; - UInt64 num_failed_shards = 0; - - ++cluster_partition.total_tries; - - LOG_INFO(log, "Processing partition {} for the whole cluster", partition_name); - - /// Process each source shard having current partition and copy current partition - /// NOTE: shards are sorted by "distance" to current host - bool has_shard_to_process = false; - for (const TaskShardPtr & shard : task_table.all_shards) - { - /// Does shard have a node with current partition? - if (!shard->partition_tasks.contains(partition_name)) - { - /// If not, did we check existence of that partition previously? - if (!shard->checked_partitions.contains(partition_name)) - { - auto check_shard_has_partition = [&] () { return checkShardHasPartition(timeouts, *shard, partition_name); }; - bool has_partition = retry(check_shard_has_partition); - - shard->checked_partitions.emplace(partition_name); - - if (has_partition) - { - const size_t number_of_splits = task_table.number_of_splits; - shard->partition_tasks.emplace(partition_name, ShardPartition(*shard, partition_name, number_of_splits)); - LOG_INFO(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription()); - /// To save references in the future. - auto shard_partition_it = shard->partition_tasks.find(partition_name); - PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces; - - for (size_t piece_number = 0; piece_number < number_of_splits; ++piece_number) - { - auto res = checkPresentPartitionPiecesOnCurrentShard(timeouts, *shard, partition_name, piece_number); - shard_partition_pieces.emplace_back(shard_partition_it->second, piece_number, res); - } - } - else - { - LOG_INFO(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name); - continue; - } - } - else - { - /// We have already checked that partition, but did not discover it - previous_shard_is_instantly_finished = true; - continue; - } - } - - auto it_shard_partition = shard->partition_tasks.find(partition_name); - /// Previously when we discovered that shard does not contain current partition, we skipped it. - /// At this moment partition have to be present. - if (it_shard_partition == shard->partition_tasks.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no such partition in a shard. This is a bug."); - auto & partition = it_shard_partition->second; - - expected_shards.emplace_back(shard); - - /// Do not sleep if there is a sequence of already processed shards to increase startup - bool is_unprioritized_task = !previous_shard_is_instantly_finished && shard->priority.is_remote; - task_status = TaskStatus::Error; - bool was_error = false; - has_shard_to_process = true; - for (UInt64 try_num = 1; try_num <= max_shard_partition_tries; ++try_num) - { - task_status = tryProcessPartitionTask(timeouts, partition, is_unprioritized_task); - - /// Exit if success - if (task_status == TaskStatus::Finished) - break; - - was_error = true; - - /// Skip if the task is being processed by someone - if (task_status == TaskStatus::Active) - break; - - /// Repeat on errors - std::this_thread::sleep_for(retry_delay_ms); - } - - if (task_status == TaskStatus::Error) - ++num_failed_shards; - - previous_shard_is_instantly_finished = !was_error; - } - - cluster_partition.elapsed_time_seconds += watch.elapsedSeconds(); - - /// Check that whole cluster partition is done - /// Firstly check the number of failed partition tasks, then look into ZooKeeper and ensure that each partition is done - bool partition_copying_is_done = num_failed_shards == 0; - try - { - partition_copying_is_done = - !has_shard_to_process - || (partition_copying_is_done && checkAllPiecesInPartitionAreDone(task_table, partition_name, expected_shards)); - } - catch (...) - { - tryLogCurrentException(log); - partition_copying_is_done = false; - } - - - bool partition_moving_is_done = false; - /// Try to move only if all pieces were copied. - if (partition_copying_is_done) - { - for (UInt64 try_num = 0; try_num < max_shard_partition_piece_tries_for_alter; ++try_num) - { - try - { - auto res = tryMoveAllPiecesToDestinationTable(task_table, partition_name); - /// Exit and mark current task is done. - if (res == TaskStatus::Finished) - { - partition_moving_is_done = true; - break; - } - - /// Exit if this task is active. - if (res == TaskStatus::Active) - break; - - /// Repeat on errors. - std::this_thread::sleep_for(retry_delay_ms); - } - catch (...) - { - tryLogCurrentException(log, "Some error occurred while moving pieces to destination table for partition " + partition_name); - } - } - } - - if (partition_copying_is_done && partition_moving_is_done) - { - task_table.finished_cluster_partitions.emplace(partition_name); - - task_table.bytes_copied += cluster_partition.bytes_copied; - task_table.rows_copied += cluster_partition.rows_copied; - double elapsed = cluster_partition.elapsed_time_seconds; - - LOG_INFO(log, "It took {} seconds to copy partition {}: {} uncompressed bytes, {} rows and {} source blocks are copied", - elapsed, partition_name, - formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied), - formatReadableQuantity(cluster_partition.rows_copied), - cluster_partition.blocks_copied); - - if (cluster_partition.rows_copied) - { - LOG_INFO(log, "Average partition speed: {} per second.", formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed)); - } - - if (task_table.rows_copied) - { - LOG_INFO(log, "Average table {} speed: {} per second.", task_table.table_id, formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed)); - } - } - } - - UInt64 required_partitions = task_table.cluster_partitions.size(); - UInt64 finished_partitions = task_table.finished_cluster_partitions.size(); - bool table_is_done = finished_partitions >= required_partitions; - - if (!table_is_done) - { - LOG_INFO(log, "Table {} is not processed yet. Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions); - } - else - { - /// Delete helping tables in case that whole table is done - dropHelpingTables(task_table); - } - - return table_is_done; -} - -TaskStatus ClusterCopier::tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) -{ - /// Try create original table (if not exists) on each shard - - //TaskTable & task_table = task_shard.task_table; - const TaskShardPtr task_shard = task_table.all_shards.at(0); - /// We need to update table definitions for each part, it could be changed after ALTER - task_shard->current_pull_table_create_query = getCreateTableForPullShard(timeouts, *task_shard); - try - { - auto create_query_push_ast - = rewriteCreateQueryStorage(task_shard->current_pull_table_create_query, task_table.table_push, task_table.engine_push_ast); - auto & create = create_query_push_ast->as(); - create.if_not_exists = true; - InterpreterCreateQuery::prepareOnClusterQuery(create, getContext(), task_table.cluster_push_name); - String query = queryToString(create_query_push_ast); - - LOG_INFO(log, "Create destination tables. Query: {}", query); - UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE); - LOG_INFO( - log, - "Destination tables {} have been created on {} shards of {}", - getQuotedTable(task_table.table_push), - shards, - task_table.cluster_push->getShardCount()); - } - catch (...) - { - tryLogCurrentException(log, "Error while creating original table. Maybe we are not first."); - } - - return TaskStatus::Finished; -} - -/// Job for copying partition from particular shard. -TaskStatus ClusterCopier::tryProcessPartitionTask(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, bool is_unprioritized_task) -{ - TaskStatus res; - - try - { - res = iterateThroughAllPiecesInPartition(timeouts, task_partition, is_unprioritized_task); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred while processing partition " + task_partition.name); - res = TaskStatus::Error; - } - - /// At the end of each task check if the config is updated - try - { - updateConfigIfNeeded(); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred while updating the config"); - } - - return res; -} - -TaskStatus ClusterCopier::iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, - bool is_unprioritized_task) -{ - const size_t total_number_of_pieces = task_partition.task_shard.task_table.number_of_splits; - - TaskStatus res{TaskStatus::Finished}; - - bool was_failed_pieces = false; - bool was_active_pieces = false; - - for (size_t piece_number = 0; piece_number < total_number_of_pieces; piece_number++) - { - for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num) - { - LOG_INFO(log, "Attempt number {} to process partition {} piece number {} on shard number {} with index {}.", - try_num, task_partition.name, piece_number, - task_partition.task_shard.numberInCluster(), - task_partition.task_shard.indexInCluster()); - - res = processPartitionPieceTaskImpl(timeouts, task_partition, piece_number, is_unprioritized_task); - - /// Exit if success - if (res == TaskStatus::Finished) - break; - - /// Skip if the task is being processed by someone - if (res == TaskStatus::Active) - break; - - /// Repeat on errors - std::this_thread::sleep_for(retry_delay_ms); - } - - was_active_pieces |= (res == TaskStatus::Active); - was_failed_pieces |= (res == TaskStatus::Error); - } - - if (was_failed_pieces) - return TaskStatus::Error; - - if (was_active_pieces) - return TaskStatus::Active; - - return TaskStatus::Finished; -} - - -TaskStatus ClusterCopier::processPartitionPieceTaskImpl( - const ConnectionTimeouts & timeouts, ShardPartition & task_partition, - const size_t current_piece_number, bool is_unprioritized_task) -{ - TaskShard & task_shard = task_partition.task_shard; - TaskTable & task_table = task_shard.task_table; - ClusterPartition & cluster_partition = task_table.getClusterPartition(task_partition.name); - ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; - - const size_t number_of_splits = task_table.number_of_splits; - const String primary_key_comma_separated = task_table.primary_key_comma_separated; - - /// We need to update table definitions for each partition, it could be changed after ALTER - createShardInternalTables(timeouts, task_shard, true); - - auto split_table_for_current_piece = task_shard.list_of_split_tables_on_shard[current_piece_number]; - - auto zookeeper = getContext()->getZooKeeper(); - - const String piece_is_dirty_flag_path = partition_piece.getPartitionPieceIsDirtyPath(); - const String piece_is_dirty_cleaned_path = partition_piece.getPartitionPieceIsCleanedPath(); - const String current_task_piece_is_active_path = partition_piece.getActiveWorkerPath(); - const String current_task_piece_status_path = partition_piece.getShardStatusPath(); - - /// Auxiliary functions: - - /// Creates is_dirty node to initialize DROP PARTITION - auto create_is_dirty_node = [&] (const CleanStateClock & clock) - { - if (clock.is_stale()) - LOG_INFO(log, "Clean state clock is stale while setting dirty flag, cowardly bailing"); - else if (!clock.is_clean()) - LOG_INFO(log, "Thank you, Captain Obvious"); - else if (clock.discovery_version) - { - LOG_INFO(log, "Updating clean state clock"); - zookeeper->set(piece_is_dirty_flag_path, host_id, clock.discovery_version.value()); - } - else - { - LOG_INFO(log, "Creating clean state clock"); - zookeeper->create(piece_is_dirty_flag_path, host_id, zkutil::CreateMode::Persistent); - } - }; - - /// Returns SELECT query filtering current partition and applying user filter - auto get_select_query = [&] (const DatabaseAndTableName & from_table, const String & fields, bool enable_splitting, String limit = "") - { - String query; - query += "WITH " + task_partition.name + " AS partition_key "; - query += "SELECT " + fields + " FROM " + getQuotedTable(from_table); - - if (enable_splitting && experimental_use_sample_offset) - query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - - /// TODO: Bad, it is better to rewrite with ASTLiteral(partition_key_field) - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; - - if (enable_splitting && !experimental_use_sample_offset) - query += " AND ( cityHash64(" + primary_key_comma_separated + ") %" + toString(number_of_splits) + " = " + toString(current_piece_number) + " )"; - - if (!task_table.where_condition_str.empty()) - query += " AND (" + task_table.where_condition_str + ")"; - - if (!limit.empty()) - query += " LIMIT " + limit; - - query += " FORMAT Native"; - - ParserQuery p_query(query.data() + query.size()); - - const auto & settings = getContext()->getSettingsRef(); - return parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth); - }; - - /// Load balancing - auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_piece_status_path, is_unprioritized_task); - - LOG_INFO(log, "Processing {}", current_task_piece_status_path); - - const String piece_status_path = partition_piece.getPartitionPieceShardsPath(); - - CleanStateClock clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - - const bool is_clean = checkPartitionPieceIsClean(zookeeper, clean_state_clock, piece_status_path); - - /// Do not start if partition piece is dirty, try to clean it - if (is_clean) - { - LOG_INFO(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number); - zookeeper->createAncestors(current_task_piece_status_path); - } - else - { - LOG_INFO(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number); - - try - { - tryDropPartitionPiece(task_partition, current_piece_number, zookeeper, clean_state_clock); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred when clean partition"); - } - - return TaskStatus::Error; - } - - /// Create ephemeral node to mark that we are active and process the partition - zookeeper->createAncestors(current_task_piece_is_active_path); - zkutil::EphemeralNodeHolderPtr partition_task_node_holder; - try - { - partition_task_node_holder = zkutil::EphemeralNodeHolder::create(current_task_piece_is_active_path, *zookeeper, host_id); - } - catch (const Coordination::Exception & e) - { - if (e.code == Coordination::Error::ZNODEEXISTS) - { - LOG_INFO(log, "Someone is already processing {}", current_task_piece_is_active_path); - return TaskStatus::Active; - } - - throw; - } - - /// Exit if task has been already processed; - /// create blocking node to signal cleaning up if it is abandoned - { - String status_data; - if (zookeeper->tryGet(current_task_piece_status_path, status_data)) - { - TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data); - if (status.state == TaskState::Finished) - { - LOG_INFO(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner); - return TaskStatus::Finished; - } - - /// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process. - /// Initialize DROP PARTITION - LOG_INFO(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner); - - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - - - /// Try create table (if not exists) on each shard - /// We have to create this table even in case that partition piece is empty - /// This is significant, because we will have simpler code - { - /// 1) Get columns description from any replica of destination cluster - /// 2) Change ENGINE, database and table name - /// 3) Create helping table on the whole destination cluster - auto & settings_push = task_cluster->settings_push; - - auto connection = task_table.cluster_push->getAnyShardInfo().pool->get(timeouts, settings_push, true); - String create_query = getRemoteCreateTable(task_shard.task_table.table_push, *connection, settings_push); - - ParserCreateQuery parser_create_query; - auto create_query_ast = parseQuery(parser_create_query, create_query, settings_push.max_query_size, settings_push.max_parser_depth); - /// Define helping table database and name for current partition piece - DatabaseAndTableName database_and_table_for_current_piece - { - task_table.table_push.first, - task_table.table_push.second + "_piece_" + toString(current_piece_number) - }; - - - auto new_engine_push_ast = task_table.engine_push_ast; - if (task_table.isReplicatedTable()) - new_engine_push_ast = task_table.rewriteReplicatedCreateQueryToPlain(); - - /// Take columns definition from destination table, new database and table name, and new engine (non replicated variant of MergeTree) - auto create_query_push_ast = rewriteCreateQueryStorage(create_query_ast, database_and_table_for_current_piece, new_engine_push_ast); - String query = queryToString(create_query_push_ast); - - LOG_INFO(log, "Create destination tables. Query: {}", query); - UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, task_cluster->settings_push, ClusterExecutionMode::ON_EACH_NODE); - LOG_INFO( - log, - "Destination tables {} have been created on {} shards of {}", - getQuotedTable(task_table.table_push), - shards, - task_table.cluster_push->getShardCount()); - } - - - /// Exit if current piece is absent on this shard. Also mark it as finished, because we will check - /// whether each shard have processed each partitition (and its pieces). - if (partition_piece.is_absent_piece) - { - String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent); - if (res == Coordination::Error::ZNODEEXISTS) - LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number); - if (res == Coordination::Error::ZOK) - LOG_INFO(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number); - return TaskStatus::Finished; - } - - /// Check that destination partition is empty if we are first worker - /// NOTE: this check is incorrect if pull and push tables have different partition key! - String clean_start_status; - if (!zookeeper->tryGet(partition_piece.getPartitionPieceCleanStartPath(), clean_start_status) || clean_start_status != "ok") - { - zookeeper->createIfNotExists(partition_piece.getPartitionPieceCleanStartPath(), ""); - auto checker = zkutil::EphemeralNodeHolder::create(partition_piece.getPartitionPieceCleanStartPath() + "/checker", - *zookeeper, host_id); - // Maybe we are the first worker - - ASTPtr query_select_ast = get_select_query(split_table_for_current_piece, "count()", /* enable_splitting= */ true); - UInt64 count; - { - auto local_context = Context::createCopy(context); - // Use pull (i.e. readonly) settings, but fetch data from destination servers - local_context->setSettings(task_cluster->settings_pull); - local_context->setSetting("skip_unavailable_shards", true); - - InterpreterSelectWithUnionQuery select(query_select_ast, local_context, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(local_context), - BuildQueryPipelineSettings::fromContext(local_context))); - - Block block = getBlockWithAllStreamData(std::move(builder)); - count = (block) ? block.safeGetByPosition(0).column->getUInt(0) : 0; - } - - if (count != 0) - { - LOG_INFO(log, "Partition {} piece {} is not empty. In contains {} rows.", task_partition.name, current_piece_number, count); - Coordination::Stat stat_shards{}; - zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards); - - /// NOTE: partition is still fresh if dirt discovery happens before cleaning - if (stat_shards.numChildren == 0) - { - LOG_WARNING(log, "There are no workers for partition {} piece {}, but destination table contains {} rows. Partition will be dropped and refilled.", task_partition.name, toString(current_piece_number), count); - - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - zookeeper->set(partition_piece.getPartitionPieceCleanStartPath(), "ok"); - } - /// At this point, we need to sync that the destination table is clean - /// before any actual work - - /// Try start processing, create node about it - { - String start_state = TaskStateWithOwner::getData(TaskState::Started, host_id); - CleanStateClock new_clean_state_clock(zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - if (clean_state_clock != new_clean_state_clock) - { - LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); - return TaskStatus::Error; - } - else if (!new_clean_state_clock.is_clean()) - { - LOG_INFO(log, "Partition {} piece {} is dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number)); - create_is_dirty_node(new_clean_state_clock); - return TaskStatus::Error; - } - zookeeper->create(current_task_piece_status_path, start_state, zkutil::CreateMode::Persistent); - } - - - /// Do the copying - { - bool inject_fault = false; - if (copy_fault_probability > 0) - { - double value = std::uniform_real_distribution<>(0, 1)(task_table.task_cluster.random_engine); - inject_fault = value < copy_fault_probability; - } - - // Select all fields - ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /* enable_splitting= */ true, inject_fault ? "1" : ""); - - LOG_INFO(log, "Executing SELECT query and pull from {}: {}", task_shard.getDescription(), queryToString(query_select_ast)); - - ASTPtr query_insert_ast; - { - String query; - query += "INSERT INTO " + getQuotedTable(split_table_for_current_piece) + " FORMAT Native "; - - ParserQuery p_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - query_insert_ast = parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Executing INSERT query: {}", query); - } - - try - { - auto context_select = Context::createCopy(context); - context_select->setSettings(task_cluster->settings_pull); - - auto context_insert = Context::createCopy(context); - context_insert->setSettings(task_cluster->settings_push); - - /// Custom INSERT SELECT implementation - QueryPipeline input; - QueryPipeline output; - { - BlockIO io_insert = InterpreterFactory::instance().get(query_insert_ast, context_insert)->execute(); - - InterpreterSelectWithUnionQuery select(query_select_ast, context_select, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(context_select), - BuildQueryPipelineSettings::fromContext(context_select))); - - output = std::move(io_insert.pipeline); - - /// Add converting actions to make it possible to copy blocks with slightly different schema - const auto & select_block = builder.getHeader(); - const auto & insert_block = output.getHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - select_block.getColumnsWithTypeAndName(), - insert_block.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext())); - - builder.addSimpleTransform([&](const Block & header) - { - return std::make_shared(header, actions); - }); - input = QueryPipelineBuilder::getPipeline(std::move(builder)); - } - - /// Fail-fast optimization to abort copying when the current clean state expires - std::future future_is_dirty_checker; - - Stopwatch watch(CLOCK_MONOTONIC_COARSE); - constexpr UInt64 check_period_milliseconds = 500; - - /// Will asynchronously check that ZooKeeper connection and is_dirty flag appearing while copying data - auto cancel_check = [&] () - { - if (zookeeper->expired()) - throw Exception(ErrorCodes::UNFINISHED, "ZooKeeper session is expired, cancel INSERT SELECT"); - - if (!future_is_dirty_checker.valid()) - future_is_dirty_checker = zookeeper->asyncExists(piece_is_dirty_flag_path); - - /// check_period_milliseconds should less than average insert time of single block - /// Otherwise, the insertion will slow a little bit - if (watch.elapsedMilliseconds() >= check_period_milliseconds) - { - Coordination::ExistsResponse status = future_is_dirty_checker.get(); - - if (status.error != Coordination::Error::ZNONODE) - { - LogicalClock dirt_discovery_epoch (status.stat.mzxid); - if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) - return false; - throw Exception(ErrorCodes::UNFINISHED, "Partition is dirty, cancel INSERT SELECT"); - } - } - - return false; - }; - - /// Update statistics - /// It is quite rough: bytes_copied don't take into account DROP PARTITION. - auto update_stats = [&cluster_partition] (const Block & block) - { - cluster_partition.bytes_copied += block.bytes(); - cluster_partition.rows_copied += block.rows(); - cluster_partition.blocks_copied += 1; - }; - - /// Main work is here - PullingPipelineExecutor pulling_executor(input); - PushingPipelineExecutor pushing_executor(output); - - Block data; - bool is_cancelled = false; - while (pulling_executor.pull(data)) - { - if (cancel_check()) - { - is_cancelled = true; - pushing_executor.cancel(); - pushing_executor.cancel(); - break; - } - pushing_executor.push(data); - update_stats(data); - } - - if (!is_cancelled) - pushing_executor.finish(); - - // Just in case - if (future_is_dirty_checker.valid()) - future_is_dirty_checker.get(); - - if (inject_fault) - throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); - } - catch (...) - { - tryLogCurrentException(log, "An error occurred during copying, partition will be marked as dirty"); - create_is_dirty_node(clean_state_clock); - return TaskStatus::Error; - } - } - - LOG_INFO(log, "Partition {} piece {} copied. But not moved to original destination table.", task_partition.name, toString(current_piece_number)); - - /// Finalize the processing, change state of current partition task (and also check is_dirty flag) - { - String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id); - CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path); - if (clean_state_clock != new_clean_state_clock) - { - LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number)); - return TaskStatus::Error; - } - else if (!new_clean_state_clock.is_clean()) - { - LOG_INFO(log, "Partition {} piece {} became dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number)); - create_is_dirty_node(new_clean_state_clock); - return TaskStatus::Error; - } - zookeeper->set(current_task_piece_status_path, state_finished, 0); - } - - return TaskStatus::Finished; -} - -void ClusterCopier::dropAndCreateLocalTable(const ASTPtr & create_ast) -{ - const auto & create = create_ast->as(); - dropLocalTableIfExists({create.getDatabase(), create.getTable()}); - - auto create_context = Context::createCopy(getContext()); - - InterpreterCreateQuery interpreter(create_ast, create_context); - interpreter.execute(); -} - -void ClusterCopier::dropLocalTableIfExists(const DatabaseAndTableName & table_name) const -{ - auto drop_ast = std::make_shared(); - drop_ast->if_exists = true; - drop_ast->setDatabase(table_name.first); - drop_ast->setTable(table_name.second); - - auto drop_context = Context::createCopy(getContext()); - - InterpreterDropQuery interpreter(drop_ast, drop_context); - interpreter.execute(); -} - -void ClusterCopier::dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number) -{ - LOG_INFO(log, "Removing helping tables piece {}", current_piece_number); - - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table - = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "DROP TABLE IF EXISTS " + getQuotedTable(helping_table); - - const ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - LOG_INFO(log, "Execute distributed DROP TABLE: {}", query); - - /// We have to drop partition_piece on each replica - UInt64 num_nodes = executeQueryOnCluster(cluster_push, query, settings_push, ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP TABLE query was successfully executed on {} nodes.", toString(num_nodes)); -} - -void ClusterCopier::dropHelpingTables(const TaskTable & task_table) -{ - LOG_INFO(log, "Removing helping tables"); - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - dropHelpingTablesByPieceNumber(task_table, current_piece_number); - } -} - -void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name) -{ - LOG_INFO(log, "Try drop partition partition from all helping tables."); - for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number) - { - DatabaseAndTableName original_table = task_table.table_push; - DatabaseAndTableName helping_table = DatabaseAndTableName(original_table.first, original_table.second + "_piece_" + toString(current_piece_number)); - - String query = "ALTER TABLE " + getQuotedTable(helping_table) + ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") + partition_name; - - const ClusterPtr & cluster_push = task_table.cluster_push; - Settings settings_push = task_cluster->settings_push; - - LOG_INFO(log, "Execute distributed DROP PARTITION: {}", query); - /// We have to drop partition_piece on each replica - UInt64 num_nodes = executeQueryOnCluster( - cluster_push, query, - settings_push, - ClusterExecutionMode::ON_EACH_NODE); - - LOG_INFO(log, "DROP PARTITION query was successfully executed on {} nodes.", toString(num_nodes)); - } - LOG_INFO(log, "All helping tables dropped partition {}", partition_name); -} - -String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings) -{ - auto remote_context = Context::createCopy(context); - remote_context->setSettings(settings); - - String query = "SHOW CREATE TABLE " + getQuotedTable(table); - - QueryPipelineBuilder builder; - builder.init(Pipe(std::make_shared( - std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false, /* async_query_sending= */ false))); - Block block = getBlockWithAllStreamData(std::move(builder)); - return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); -} - - -ASTPtr ClusterCopier::getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard) -{ - /// Fetch and parse (possibly) new definition - auto connection_entry = task_shard.info.pool->get(timeouts, task_cluster->settings_pull, true); - String create_query_pull_str = getRemoteCreateTable( - task_shard.task_table.table_pull, - *connection_entry, - task_cluster->settings_pull); - - ParserCreateQuery parser_create_query; - const auto & settings = getContext()->getSettingsRef(); - return parseQuery(parser_create_query, create_query_pull_str, settings.max_query_size, settings.max_parser_depth); -} - - -/// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it. -void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, bool create_split) -{ - TaskTable & task_table = task_shard.task_table; - - /// We need to update table definitions for each part, it could be changed after ALTER - task_shard.current_pull_table_create_query = getCreateTableForPullShard(timeouts, task_shard); - - /// Create local Distributed tables: - /// a table fetching data from current shard and a table inserting data to the whole destination cluster - String read_shard_prefix = ".read_shard_" + toString(task_shard.indexInCluster()) + "."; - String split_shard_prefix = ".split."; - task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id); - task_shard.main_table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id); - - for (const auto & piece_number : collections::range(0, task_table.number_of_splits)) - { - task_shard.list_of_split_tables_on_shard[piece_number] = - DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id + "_piece_" + toString(piece_number)); - } - - /// Create special cluster with single shard - String shard_read_cluster_name = read_shard_prefix + task_table.cluster_pull_name; - ClusterPtr cluster_pull_current_shard = task_table.cluster_pull->getClusterWithSingleShard(task_shard.indexInCluster()); - getContext()->setCluster(shard_read_cluster_name, cluster_pull_current_shard); - - auto storage_shard_ast = createASTStorageDistributed(shard_read_cluster_name, task_table.table_pull.first, task_table.table_pull.second); - - auto create_query_ast = removeAliasMaterializedAndTTLColumnsFromCreateQuery( - task_shard.current_pull_table_create_query, - task_table.allow_to_copy_alias_and_materialized_columns); - - auto create_table_pull_ast = rewriteCreateQueryStorage(create_query_ast, task_shard.table_read_shard, storage_shard_ast); - dropAndCreateLocalTable(create_table_pull_ast); - - if (create_split) - { - auto create_table_split_piece_ast = rewriteCreateQueryStorage( - create_query_ast, - task_shard.main_table_split_shard, - task_table.main_engine_split_ast); - - dropAndCreateLocalTable(create_table_split_piece_ast); - - /// Create auxiliary split tables for each piece - for (const auto & piece_number : collections::range(0, task_table.number_of_splits)) - { - const auto & storage_piece_split_ast = task_table.auxiliary_engine_split_asts[piece_number]; - - create_table_split_piece_ast = rewriteCreateQueryStorage( - create_query_ast, - task_shard.list_of_split_tables_on_shard[piece_number], - storage_piece_split_ast); - - dropAndCreateLocalTable(create_table_split_piece_ast); - } - } - -} - - -std::set ClusterCopier::getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard) -{ - std::set res; - - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - - const String & partition_name = queryToString(task_table.engine_push_partition_key_ast); - - if (partition_name == "'all'") - { - res.emplace("'all'"); - return res; - } - - String query; - { - WriteBufferFromOwnString wb; - wb << "SELECT " << partition_name << " AS partition FROM " - << getQuotedTable(task_shard.table_read_shard) << " GROUP BY partition ORDER BY partition DESC"; - query = wb.str(); - } - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Computing destination partition set, executing query: {}", query); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - InterpreterSelectWithUnionQuery select(query_ast, local_context, SelectQueryOptions{}); - QueryPlan plan; - select.buildQueryPlan(plan); - auto builder = std::move(*plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(local_context), - BuildQueryPipelineSettings::fromContext(local_context))); - - Block block = getBlockWithAllStreamData(std::move(builder)); - - if (block) - { - ColumnWithTypeAndName & column = block.getByPosition(0); - task_shard.partition_key_column = column; - - for (size_t i = 0; i < column.column->size(); ++i) - { - WriteBufferFromOwnString wb; - column.type->getDefaultSerialization()->serializeTextQuoted(*column.column, i, wb, FormatSettings()); - res.emplace(wb.str()); - } - } - - LOG_INFO(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription()); - - return res; -} - -bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name) -{ - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - - WriteBufferFromOwnString ss; - ss << "WITH " + partition_quoted_name + " AS partition_key "; - ss << "SELECT 1 FROM " << getQuotedTable(task_shard.table_read_shard); - ss << " WHERE (" << queryToString(task_table.engine_push_partition_key_ast) << " = partition_key)"; - if (!task_table.where_condition_str.empty()) - ss << " AND (" << task_table.where_condition_str << ")"; - ss << " LIMIT 1"; - auto query = ss.str(); - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - LOG_INFO(log, "Checking shard {} for partition {} existence, executing query: {}", - task_shard.getDescription(), partition_quoted_name, query_ast->formatForErrorMessage()); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; - PullingPipelineExecutor executor(pipeline); - Block block; - executor.pull(block); - return block.rows() != 0; -} - -bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number) -{ - createShardInternalTables(timeouts, task_shard, false); - - TaskTable & task_table = task_shard.task_table; - const size_t number_of_splits = task_table.number_of_splits; - const String & primary_key_comma_separated = task_table.primary_key_comma_separated; - - UNUSED(primary_key_comma_separated); - - std::string query; - - query += "WITH " + partition_quoted_name + " AS partition_key "; - query += "SELECT 1 FROM " + getQuotedTable(task_shard.table_read_shard); - - if (experimental_use_sample_offset) - query += " SAMPLE 1/" + toString(number_of_splits) + " OFFSET " + toString(current_piece_number) + "/" + toString(number_of_splits); - - query += " WHERE (" + queryToString(task_table.engine_push_partition_key_ast) + " = partition_key)"; - - if (!experimental_use_sample_offset) - query += " AND (cityHash64(" + primary_key_comma_separated + ") % " - + std::to_string(number_of_splits) + " = " + std::to_string(current_piece_number) + " )"; - - if (!task_table.where_condition_str.empty()) - query += " AND (" + task_table.where_condition_str + ")"; - - query += " LIMIT 1"; - - LOG_INFO(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query); - - ParserQuery parser_query(query.data() + query.size()); - const auto & settings = getContext()->getSettingsRef(); - ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth); - - auto local_context = Context::createCopy(context); - local_context->setSettings(task_cluster->settings_pull); - auto pipeline = InterpreterFactory::instance().get(query_ast, local_context)->execute().pipeline; - PullingPipelineExecutor executor(pipeline); - Block result; - executor.pull(result); - if (result.rows() != 0) - LOG_INFO(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - else - LOG_INFO(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription()); - return result.rows() != 0; -} - - -/** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster - * Returns number of shards for which at least one replica executed query successfully - */ -UInt64 ClusterCopier::executeQueryOnCluster( - const ClusterPtr & cluster, - const String & query, - const Settings & current_settings, - ClusterExecutionMode execution_mode) const -{ - ClusterPtr cluster_for_query = cluster; - if (execution_mode == ClusterExecutionMode::ON_EACH_NODE) - cluster_for_query = cluster->getClusterWithReplicasAsShards(current_settings); - - std::vector> connections; - connections.reserve(cluster->getShardCount()); - - std::atomic successfully_executed = 0; - - for (const auto & replicas : cluster_for_query->getShardsAddresses()) - { - for (const auto & node : replicas) - { - try - { - connections.emplace_back(std::make_shared( - node.host_name, node.port, node.default_database, - node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, - "ClusterCopier", node.compression, node.secure - )); - - /// We execute only Alter, Create and Drop queries. - const auto header = Block{}; - - /// For unknown reason global context is passed to IStorage::read() method - /// So, task_identifier is passed as constructor argument. It is more obvious. - auto remote_query_executor = std::make_shared( - *connections.back(), query, header, getContext(), - /*throttler=*/nullptr, Scalars(), Tables(), QueryProcessingStage::Complete); - - try - { - remote_query_executor->sendQuery(); - } - catch (...) - { - LOG_WARNING(log, "Node with address {} seems to be unreachable.", node.host_name); - continue; - } - - while (true) - { - auto block = remote_query_executor->readBlock(); - if (!block) - break; - } - - remote_query_executor->finish(); - ++successfully_executed; - break; - } - catch (...) - { - LOG_WARNING(log, "An error occurred while processing query: {}", query); - tryLogCurrentException(log); - continue; - } - } - } - - return successfully_executed.load(); -} - -} diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h deleted file mode 100644 index 01f8b30f546..00000000000 --- a/programs/copier/ClusterCopier.h +++ /dev/null @@ -1,240 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "TaskCluster.h" -#include "TaskShard.h" -#include "TaskTable.h" -#include "ShardPartition.h" -#include "ShardPartitionPiece.h" -#include "ZooKeeperStaff.h" - - -namespace DB -{ - -class ClusterCopier : WithMutableContext -{ -public: - ClusterCopier(const String & task_path_, - const String & host_id_, - const String & proxy_database_name_, - ContextMutablePtr context_, - LoggerRawPtr log_) - : WithMutableContext(context_), - task_zookeeper_path(task_path_), - host_id(host_id_), - working_database_name(proxy_database_name_), - log(log_) {} - - void init(); - - template - decltype(auto) retry(T && func, UInt64 max_tries = 100); - - void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard); - - /// Compute set of partitions, assume set of partitions aren't changed during the processing - void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0); - - void uploadTaskDescription(const std::string & task_path, const std::string & task_file, bool force); - - void reloadTaskDescription(); - - void updateConfigIfNeeded(); - - void process(const ConnectionTimeouts & timeouts); - - /// Disables DROP PARTITION commands that used to clear data after errors - void setSafeMode(bool is_safe_mode_ = true) - { - is_safe_mode = is_safe_mode_; - } - - void setCopyFaultProbability(double copy_fault_probability_) - { - copy_fault_probability = copy_fault_probability_; - } - - void setMoveFaultProbability(double move_fault_probability_) - { - move_fault_probability = move_fault_probability_; - } - - void setExperimentalUseSampleOffset(bool value) - { - experimental_use_sample_offset = value; - } - - void setMaxTableTries(UInt64 tries) - { - max_table_tries = tries; - } - void setMaxShardPartitionTries(UInt64 tries) - { - max_shard_partition_tries = tries; - } - void setMaxShardPartitionPieceTriesForAlter(UInt64 tries) - { - max_shard_partition_piece_tries_for_alter = tries; - } - void setRetryDelayMs(std::chrono::milliseconds ms) - { - retry_delay_ms = ms; - } - -protected: - - String getWorkersPath() const - { - return task_cluster->task_zookeeper_path + "/task_active_workers"; - } - - String getWorkersPathVersion() const - { - return getWorkersPath() + "_version"; - } - - String getCurrentWorkerNodePath() const - { - return getWorkersPath() + "/" + host_id; - } - - zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed( - const zkutil::ZooKeeperPtr & zookeeper, - const String & description, - bool unprioritized); - - /* - * Checks that partition piece or some other entity is clean. - * The only requirement is that you have to pass is_dirty_flag_path and is_dirty_cleaned_path to the function. - * And is_dirty_flag_path is a parent of is_dirty_cleaned_path. - * */ - static bool checkPartitionPieceIsClean( - const zkutil::ZooKeeperPtr & zookeeper, - const CleanStateClock & clean_state_clock, - const String & task_status_path); - - bool checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition); - - /** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock. - * State of some task could change during the processing. - * We have to ensure that all shards have the finished state and there is no dirty flag. - * Moreover, we have to check status twice and check zxid, because state can change during the checking. - */ - - /* The same as function above - * Assume that we don't know on which shards do we have partition certain piece. - * We'll check them all (I mean shards that contain the whole partition) - * And shards that don't have certain piece MUST mark that piece is_done true. - * */ - bool checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name, - size_t piece_number, const TasksShard & shards_with_partition); - - - /*Alter successful insertion to helping tables it will move all pieces to destination table*/ - TaskStatus tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name); - - /// Removes MATERIALIZED and ALIAS columns from create table query - static ASTPtr removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns); - - bool tryDropPartitionPiece(ShardPartition & task_partition, size_t current_piece_number, - const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock); - - bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table); - - TaskStatus tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table); - /// Job for copying partition from particular shard. - TaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - bool is_unprioritized_task); - - TaskStatus iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - bool is_unprioritized_task); - - TaskStatus processPartitionPieceTaskImpl(const ConnectionTimeouts & timeouts, - ShardPartition & task_partition, - size_t current_piece_number, - bool is_unprioritized_task); - - void dropAndCreateLocalTable(const ASTPtr & create_ast); - - void dropLocalTableIfExists(const DatabaseAndTableName & table_name) const; - - void dropHelpingTables(const TaskTable & task_table); - - void dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number); - - /// Is used for usage less disk space. - /// After all pieces were successfully moved to original destination - /// table we can get rid of partition pieces (partitions in helping tables). - void dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name); - - String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings); - - ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard); - - /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it. - void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true); - - std::set getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard); - - bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name); - - bool checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts, - TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number); - - /* - * This class is used in executeQueryOnCluster function - * You can execute query on each shard (no sense it is executed on each replica of a shard or not) - * or you can execute query on each replica on each shard. - * First mode is useful for INSERTS queries. - * */ - enum ClusterExecutionMode - { - ON_EACH_SHARD, - ON_EACH_NODE - }; - - /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster - * Returns number of shards for which at least one replica executed query successfully - */ - UInt64 executeQueryOnCluster( - const ClusterPtr & cluster, - const String & query, - const Settings & current_settings, - ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const; - -private: - String task_zookeeper_path; - String task_description_path; - String host_id; - String working_database_name; - - /// Auto update config stuff - UInt64 task_description_current_version = 1; - std::atomic task_description_version{1}; - Coordination::WatchCallback task_description_watch_callback; - /// ZooKeeper session used to set the callback - zkutil::ZooKeeperPtr task_description_watch_zookeeper; - - ConfigurationPtr task_cluster_initial_config; - ConfigurationPtr task_cluster_current_config; - - std::unique_ptr task_cluster; - - bool is_safe_mode = false; - double copy_fault_probability = 0.0; - double move_fault_probability = 0.0; - - bool experimental_use_sample_offset{false}; - - LoggerRawPtr log; - - UInt64 max_table_tries = 3; - UInt64 max_shard_partition_tries = 3; - UInt64 max_shard_partition_piece_tries_for_alter = 10; - std::chrono::milliseconds retry_delay_ms{1000}; -}; -} diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp deleted file mode 100644 index fdf07dec61a..00000000000 --- a/programs/copier/ClusterCopierApp.cpp +++ /dev/null @@ -1,252 +0,0 @@ -#include "ClusterCopierApp.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace fs = std::filesystem; - -namespace DB -{ - -/// ClusterCopierApp - -void ClusterCopierApp::initialize(Poco::Util::Application & self) -{ - is_help = config().has("help"); - if (is_help) - return; - - config_xml_path = config().getString("config-file"); - task_path = config().getString("task-path"); - log_level = config().getString("log-level", "info"); - is_safe_mode = config().has("safe-mode"); - is_status_mode = config().has("status"); - if (config().has("copy-fault-probability")) - copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0); - if (config().has("move-fault-probability")) - move_fault_probability = std::max(std::min(config().getDouble("move-fault-probability"), 1.0), 0.0); - base_dir = (config().has("base-dir")) ? config().getString("base-dir") : fs::current_path().string(); - - max_table_tries = std::max(config().getUInt("max-table-tries", 3), 1); - max_shard_partition_tries = std::max(config().getUInt("max-shard-partition-tries", 3), 1); - max_shard_partition_piece_tries_for_alter = std::max(config().getUInt("max-shard-partition-piece-tries-for-alter", 10), 1); - retry_delay_ms = std::chrono::milliseconds(std::max(config().getUInt("retry-delay-ms", 1000), 100)); - - if (config().has("experimental-use-sample-offset")) - experimental_use_sample_offset = config().getBool("experimental-use-sample-offset"); - - // process_id is '#_' - time_t timestamp = Poco::Timestamp().epochTime(); - auto curr_pid = Poco::Process::id(); - - process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid); - host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id; - process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id)); - fs::create_directories(process_path); - - /// Override variables for BaseDaemon - if (config().has("log-level")) - config().setString("logger.level", config().getString("log-level")); - - if (config().has("base-dir") || !config().has("logger.log")) - config().setString("logger.log", fs::path(process_path) / "log.log"); - - if (config().has("base-dir") || !config().has("logger.errorlog")) - config().setString("logger.errorlog", fs::path(process_path) / "log.err.log"); - - Base::initialize(self); -} - - -void ClusterCopierApp::handleHelp(const std::string &, const std::string &) -{ - uint16_t terminal_width = 0; - if (isatty(STDIN_FILENO)) - terminal_width = getTerminalWidth(); - - Poco::Util::HelpFormatter help_formatter(options()); - if (terminal_width) - help_formatter.setWidth(terminal_width); - help_formatter.setCommand(commandName()); - help_formatter.setHeader("Copies tables from one cluster to another"); - help_formatter.setUsage("--config-file --task-path "); - help_formatter.format(std::cerr); - help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/"); - - stopOptionsProcessing(); -} - - -void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) -{ - Base::defineOptions(options); - - options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper") - .argument("task-path").binding("task-path")); - options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path") - .argument("task-file").binding("task-file")); - options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.") - .argument("task-upload-force").binding("task-upload-force")); - options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors") - .binding("safe-mode")); - options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)") - .argument("copy-fault-probability").binding("copy-fault-probability")); - options.addOption(Poco::Util::Option("move-fault-probability", "", "the moving fails with specified probability (used to test partition state recovering)") - .argument("move-fault-probability").binding("move-fault-probability")); - options.addOption(Poco::Util::Option("log-level", "", "sets log level") - .argument("log-level").binding("log-level")); - options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories") - .argument("base-dir").binding("base-dir")); - options.addOption(Poco::Util::Option("experimental-use-sample-offset", "", "Use SAMPLE OFFSET query instead of cityHash64(PRIMARY KEY) % n == k") - .argument("experimental-use-sample-offset").binding("experimental-use-sample-offset")); - options.addOption(Poco::Util::Option("status", "", "Get for status for current execution").binding("status")); - - options.addOption(Poco::Util::Option("max-table-tries", "", "Number of tries for the copy table task") - .argument("max-table-tries").binding("max-table-tries")); - options.addOption(Poco::Util::Option("max-shard-partition-tries", "", "Number of tries for the copy one partition task") - .argument("max-shard-partition-tries").binding("max-shard-partition-tries")); - options.addOption(Poco::Util::Option("max-shard-partition-piece-tries-for-alter", "", "Number of tries for final ALTER ATTACH to destination table") - .argument("max-shard-partition-piece-tries-for-alter").binding("max-shard-partition-piece-tries-for-alter")); - options.addOption(Poco::Util::Option("retry-delay-ms", "", "Delay between task retries") - .argument("retry-delay-ms").binding("retry-delay-ms")); - - using Me = std::decay_t; - options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help") - .callback(Poco::Util::OptionCallback(this, &Me::handleHelp))); -} - - -void ClusterCopierApp::mainImpl() -{ - /// Status command - { - if (is_status_mode) - { - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - SCOPE_EXIT_SAFE(context->shutdown()); - - auto zookeeper = context->getZooKeeper(); - auto status_json = zookeeper->get(task_path + "/status"); - - LOG_INFO(&logger(), "{}", status_json); - std::cout << status_json << std::endl; - - context->resetZooKeeper(); - return; - } - } - StatusFile status_file(process_path + "/status", StatusFile::write_full_info); - ThreadStatus thread_status; - - auto * log = &logger(); - LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision()); - - SharedContextHolder shared_context = Context::createShared(); - auto context = Context::createGlobal(shared_context.get()); - context->makeGlobalContext(); - SCOPE_EXIT_SAFE(context->shutdown()); - - context->setConfig(loaded_config.configuration); - context->setApplicationType(Context::ApplicationType::LOCAL); - context->setPath(process_path + "/"); - - registerInterpreters(); - registerFunctions(); - registerAggregateFunctions(); - registerTableFunctions(); - registerDatabases(); - registerStorages(); - registerDictionaries(); - registerDisks(/* global_skip_access_check= */ true); - registerFormats(); - - static const std::string default_database = "_local"; - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, context)); - context->setCurrentDatabase(default_database); - - /// Disable queries logging, since: - /// - There are bits that is not allowed for global context, like adding factories info (for the query_log) - /// - And anyway it is useless for copier. - context->setSetting("log_queries", false); - - auto local_context = Context::createCopy(context); - - /// Initialize query scope just in case. - CurrentThread::QueryScope query_scope(local_context); - - auto copier = std::make_unique( - task_path, host_id, default_database, local_context, log); - copier->setSafeMode(is_safe_mode); - copier->setCopyFaultProbability(copy_fault_probability); - copier->setMoveFaultProbability(move_fault_probability); - copier->setMaxTableTries(max_table_tries); - copier->setMaxShardPartitionTries(max_shard_partition_tries); - copier->setMaxShardPartitionPieceTriesForAlter(max_shard_partition_piece_tries_for_alter); - copier->setRetryDelayMs(retry_delay_ms); - copier->setExperimentalUseSampleOffset(experimental_use_sample_offset); - - auto task_file = config().getString("task-file", ""); - if (!task_file.empty()) - copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); - - zkutil::validateZooKeeperConfig(config()); - - copier->init(); - copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef())); - - /// Reset ZooKeeper before removing ClusterCopier. - /// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object. - context->resetZooKeeper(); -} - - -int ClusterCopierApp::main(const std::vector &) -{ - if (is_help) - return 0; - - try - { - mainImpl(); - } - catch (...) - { - tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__); - auto code = getCurrentExceptionCode(); - - return (code) ? code : -1; - } - - return 0; -} - - -} - -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" - -int mainEntryClickHouseClusterCopier(int argc, char ** argv) -{ - try - { - DB::ClusterCopierApp app; - return app.run(argc, argv); - } - catch (...) - { - std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; - auto code = DB::getCurrentExceptionCode(); - - return (code) ? code : -1; - } -} diff --git a/programs/copier/ClusterCopierApp.h b/programs/copier/ClusterCopierApp.h deleted file mode 100644 index 0ddc232381e..00000000000 --- a/programs/copier/ClusterCopierApp.h +++ /dev/null @@ -1,99 +0,0 @@ -#pragma once - -#include -#include - -#include "ClusterCopier.h" - -/* clickhouse cluster copier util - * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner. - * - * See overview in the docs: docs/en/utils/clickhouse-copier.md - * - * Implementation details: - * - * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through - * Distributed table (to perform data resharding). So, worker job is a partition of a source shard. - * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job. - * - * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on - * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards. - * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition - * should stop, after a refilling procedure should start. - * - * ZooKeeper task node has the following structure: - * /task/path_root - path passed in --task-path parameter - * /description - contains user-defined XML config of the task - * /task_active_workers - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation - * /server_fqdn#PID_timestamp - cluster-copier worker ID - * ... - * /tables - directory with table tasks - * /cluster.db.table1 - directory of table_hits task - * /partition1 - directory for partition1 - * /shards - directory for source cluster shards - * /1 - worker job for the first shard of partition1 of table test.hits - * Contains info about current status (Active or Finished) and worker ID. - * /2 - * ... - * /partition_active_workers - * /1 - for each job in /shards a corresponding ephemeral node created in /partition_active_workers - * It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in - * /partition_active_workers). - * Also, it is used to track active workers in the partition (when we need to refill the partition we do - * not DROP PARTITION while there are active workers) - * /2 - * ... - * /is_dirty - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is - * detected). If the node appeared workers in this partition should stop and start cleaning and refilling - * partition procedure. - * During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition - * workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node. - * /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker. - * /cluster.db.table2 - * ... - */ - -namespace DB -{ - -class ClusterCopierApp : public BaseDaemon -{ -public: - - void initialize(Poco::Util::Application & self) override; - - void handleHelp(const std::string &, const std::string &); - - void defineOptions(Poco::Util::OptionSet & options) override; - - int main(const std::vector &) override; - -private: - - using Base = BaseDaemon; - - void mainImpl(); - - std::string config_xml_path; - std::string task_path; - std::string log_level = "info"; - bool is_safe_mode = false; - bool is_status_mode = false; - double copy_fault_probability = 0.0; - double move_fault_probability = 0.0; - bool is_help = false; - - UInt64 max_table_tries = 3; - UInt64 max_shard_partition_tries = 3; - UInt64 max_shard_partition_piece_tries_for_alter = 10; - std::chrono::milliseconds retry_delay_ms{1000}; - - bool experimental_use_sample_offset{false}; - - std::string base_dir; - std::string process_path; - std::string process_id; - std::string host_id; -}; - -} diff --git a/programs/copier/ClusterPartition.h b/programs/copier/ClusterPartition.h deleted file mode 100644 index 22063989e22..00000000000 --- a/programs/copier/ClusterPartition.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/// Contains info about all shards that contain a partition -struct ClusterPartition -{ - double elapsed_time_seconds = 0; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - UInt64 blocks_copied = 0; - - UInt64 total_tries = 0; -}; - -using ClusterPartitions = std::map>; - -} diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp deleted file mode 100644 index dcd199c6b38..00000000000 --- a/programs/copier/Internals.cpp +++ /dev/null @@ -1,280 +0,0 @@ -#include "Internals.h" -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -using ConfigurationPtr = Poco::AutoPtr; - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data) -{ - std::stringstream ss(xml_data); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::XML::InputSource input_source{ss}; - return {new Poco::Util::XMLConfiguration{&input_source}}; -} - -String getQuotedTable(const String & database, const String & table) -{ - if (database.empty()) - return backQuoteIfNeed(table); - - return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); -} - -String getQuotedTable(const DatabaseAndTableName & db_and_table) -{ - return getQuotedTable(db_and_table.first, db_and_table.second); -} - - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast) -{ - auto args = std::make_shared(); - args->children.emplace_back(std::make_shared(cluster_name)); - args->children.emplace_back(std::make_shared(database)); - args->children.emplace_back(std::make_shared(table)); - if (sharding_key_ast) - args->children.emplace_back(sharding_key_ast); - - auto engine = std::make_shared(); - engine->name = "Distributed"; - engine->arguments = args; - - auto storage = std::make_shared(); - storage->set(storage->engine, engine); - - return storage; -} - - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder) -{ - builder.addTransform(std::make_shared( - builder.getHeader(), - std::numeric_limits::max(), - std::numeric_limits::max())); - - auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); - Block block; - PullingPipelineExecutor executor(cur_pipeline); - executor.pull(block); - - return block; -} - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - return storage.partition_by || storage.order_by || storage.sample_by; -} - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (isExtendedDefinitionStorage(storage_ast)) - { - if (storage.partition_by) - return storage.partition_by->clone(); - - static const char * all = "all"; - return std::make_shared(Field(all, strlen(all))); - } - else - { - bool is_replicated = startsWith(engine.name, "Replicated"); - size_t min_args = is_replicated ? 3 : 1; - - if (!engine.arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str); - - ASTPtr arguments_ast = engine.arguments->clone(); - ASTs & arguments = arguments_ast->children; - - if (arguments.size() < min_args) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str); - - ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; - return makeASTFunction("toYYYYMM", month_arg->clone()); - } -} - -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.primary_key) - return storage.primary_key->clone(); - - return nullptr; -} - - -ASTPtr extractOrderBy(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.order_by) - return storage.order_by->clone(); - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); -} - -/// Wraps only identifiers with backticks. -std::string wrapIdentifiersWithBackticks(const ASTPtr & root) -{ - if (auto identifier = std::dynamic_pointer_cast(root)) - return backQuote(identifier->name()); - - if (auto function = std::dynamic_pointer_cast(root)) - return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')'; - - if (auto expression_list = std::dynamic_pointer_cast(root)) - { - Names function_arguments(expression_list->children.size()); - for (size_t i = 0; i < expression_list->children.size(); ++i) - function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]); - return boost::algorithm::join(function_arguments, ", "); - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns."); -} - - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) -{ - const auto sorting_key_ast = extractOrderBy(storage_ast); - const auto primary_key_ast = extractPrimaryKey(storage_ast); - - const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast); - const auto primary_key_expr_list = primary_key_ast - ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); - - /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless. - - size_t primary_key_size = primary_key_expr_list->children.size(); - size_t sorting_key_size = sorting_key_expr_list->children.size(); - - if (primary_key_size > sorting_key_size) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " - "{} is greater than the sorting key length: {}", - primary_key_size, sorting_key_size); - - Names primary_key_columns; - NameSet primary_key_columns_set; - - for (size_t i = 0; i < sorting_key_size; ++i) - { - /// Column name could be represented as a f_1(f_2(...f_n(column_name))). - /// Each f_i could take one or more parameters. - /// We will wrap identifiers with backticks to allow non-standard identifier names. - String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); - - if (i < primary_key_size) - { - String pk_column = primary_key_expr_list->children[i]->getColumnName(); - if (pk_column != sorting_key_column) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Primary key must be a prefix of the sorting key, " - "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column); - - if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); - - primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i])); - } - } - - return primary_key_columns; -} - -bool isReplicatedTableEngine(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - String storage_str = queryToString(storage_ast); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - return startsWith(engine.name, "Replicated"); -} - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random) -{ - ShardPriority res; - - if (replicas.empty()) - return res; - - res.is_remote = 1; - for (const auto & replica : replicas) - { - if (isLocalAddress(DNSResolver::instance().resolveHostAllInOriginOrder(replica.host_name).front())) - { - res.is_remote = 0; - break; - } - } - - res.hostname_difference = std::numeric_limits::max(); - for (const auto & replica : replicas) - { - size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name); - res.hostname_difference = std::min(difference, res.hostname_difference); - } - - res.random = random; - return res; -} - -} diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h deleted file mode 100644 index 27fedd5d9e8..00000000000 --- a/programs/copier/Internals.h +++ /dev/null @@ -1,198 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Aliases.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data); - -String getQuotedTable(const String & database, const String & table); - -String getQuotedTable(const DatabaseAndTableName & db_and_table); - - -enum class TaskState -{ - Started = 0, - Finished, - Unknown -}; - -/// Used to mark status of shard partition tasks -struct TaskStateWithOwner -{ - TaskStateWithOwner() = default; - - TaskStateWithOwner(TaskState state_, const String & owner_) : state(state_), owner(owner_) {} - - TaskState state{TaskState::Unknown}; - String owner; - - static String getData(TaskState state, const String &owner) - { - return TaskStateWithOwner(state, owner).toString(); - } - - String toString() const - { - WriteBufferFromOwnString wb; - wb << static_cast(state) << "\n" << escape << owner; - return wb.str(); - } - - static TaskStateWithOwner fromString(const String & data) - { - ReadBufferFromString rb(data); - TaskStateWithOwner res; - UInt32 state; - - rb >> state >> "\n" >> escape >> res.owner; - - if (state >= static_cast(TaskState::Unknown)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data); - - res.state = static_cast(state); - return res; - } -}; - - -struct ShardPriority -{ - UInt8 is_remote = 1; - size_t hostname_difference = 0; - UInt8 random = 0; - - static bool greaterPriority(const ShardPriority & current, const ShardPriority & other) - { - return std::forward_as_tuple(current.is_remote, current.hostname_difference, current.random) - < std::forward_as_tuple(other.is_remote, other.hostname_difference, other.random); - } -}; - -/// Execution status of a task. -/// Is used for: partition copying task status, partition piece copying task status, partition moving task status. -enum class TaskStatus -{ - Active, - Finished, - Error, -}; - -struct MultiTransactionInfo -{ - int32_t code; - Coordination::Requests requests; - Coordination::Responses responses; -}; - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast = nullptr); - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder); - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast); - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast); - -/* -* Choosing a Primary Key that Differs from the Sorting Key -* It is possible to specify a primary key (an expression with values that are written in the index file for each mark) -* that is different from the sorting key (an expression for sorting the rows in data parts). -* In this case the primary key expression tuple must be a prefix of the sorting key expression tuple. -* This feature is helpful when using the SummingMergeTree and AggregatingMergeTree table engines. -* In a common case when using these engines, the table has two types of columns: dimensions and measures. -* Typical queries aggregate values of measure columns with arbitrary GROUP BY and filtering by dimensions. -* Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, -* it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns -* and this list must be frequently updated with newly added dimensions. -* In this case it makes sense to leave only a few columns in the primary key that will provide efficient -* range scans and add the remaining dimension columns to the sorting key tuple. -* ALTER of the sorting key is a lightweight operation because when a new column is simultaneously added t -* o the table and to the sorting key, existing data parts don't need to be changed. -* Since the old sorting key is a prefix of the new sorting key and there is no data in the newly added column, -* the data is sorted by both the old and new sorting keys at the moment of table modification. -* -* */ -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast); - -ASTPtr extractOrderBy(const ASTPtr & storage_ast); - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast); - -bool isReplicatedTableEngine(const ASTPtr & storage_ast); - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random); - -} diff --git a/programs/copier/ShardPartition.cpp b/programs/copier/ShardPartition.cpp deleted file mode 100644 index 4c962fc807d..00000000000 --- a/programs/copier/ShardPartition.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "ShardPartition.h" - -#include "TaskShard.h" -#include "TaskTable.h" - -namespace DB -{ - -ShardPartition::ShardPartition(TaskShard & parent, String name_quoted_, size_t number_of_splits) - : task_shard(parent) - , name(std::move(name_quoted_)) -{ - pieces.reserve(number_of_splits); -} - -String ShardPartition::getPartitionCleanStartPath() const -{ - return getPartitionPath() + "/clean_start"; -} - -String ShardPartition::getPartitionPieceCleanStartPath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return getPartitionPiecePath(current_piece_number) + "/clean_start"; -} - -String ShardPartition::getPartitionPath() const -{ - return task_shard.task_table.getPartitionPath(name); -} - -String ShardPartition::getPartitionPiecePath(size_t current_piece_number) const -{ - assert(current_piece_number < task_shard.task_table.number_of_splits); - return task_shard.task_table.getPartitionPiecePath(name, current_piece_number); -} - -String ShardPartition::getShardStatusPath() const -{ - // schema: //tables///shards/ - // e.g. /root/table_test.hits/201701/shards/1 - return getPartitionShardsPath() + "/" + toString(task_shard.numberInCluster()); -} - -String ShardPartition::getPartitionShardsPath() const -{ - return getPartitionPath() + "/shards"; -} - -String ShardPartition::getPartitionActiveWorkersPath() const -{ - return getPartitionPath() + "/partition_active_workers"; -} - -String ShardPartition::getActiveWorkerPath() const -{ - return getPartitionActiveWorkersPath() + "/" + toString(task_shard.numberInCluster()); -} - -String ShardPartition::getCommonPartitionIsDirtyPath() const -{ - return getPartitionPath() + "/is_dirty"; -} - -String ShardPartition::getCommonPartitionIsCleanedPath() const -{ - return getCommonPartitionIsDirtyPath() + "/cleaned"; -} - -} diff --git a/programs/copier/ShardPartition.h b/programs/copier/ShardPartition.h deleted file mode 100644 index 2457213733c..00000000000 --- a/programs/copier/ShardPartition.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once - -#include "ShardPartitionPiece.h" - -#include - -#include - -namespace DB -{ - -struct TaskShard; - -/// Just destination partition of a shard -/// I don't know what this comment means. -/// In short, when we discovered what shards contain currently processing partition, -/// This class describes a partition (name) that is stored on the shard (parent). -struct ShardPartition -{ - ShardPartition(TaskShard &parent, String name_quoted_, size_t number_of_splits = 10); - - String getPartitionPath() const; - - String getPartitionPiecePath(size_t current_piece_number) const; - - String getPartitionCleanStartPath() const; - - String getPartitionPieceCleanStartPath(size_t current_piece_number) const; - - String getCommonPartitionIsDirtyPath() const; - - String getCommonPartitionIsCleanedPath() const; - - String getPartitionActiveWorkersPath() const; - - String getActiveWorkerPath() const; - - String getPartitionShardsPath() const; - - String getShardStatusPath() const; - - /// What partition pieces are present in current shard. - /// FYI: Piece is a part of partition which has modulo equals to concrete constant (less than number_of_splits obliously) - /// For example SELECT ... from ... WHERE partition=current_partition AND cityHash64(*) == const; - /// Absent pieces have field is_absent_piece equals to true. - PartitionPieces pieces; - - TaskShard & task_shard; - String name; -}; - -using TasksPartition = std::map>; - -} diff --git a/programs/copier/ShardPartitionPiece.cpp b/programs/copier/ShardPartitionPiece.cpp deleted file mode 100644 index 36d1621e012..00000000000 --- a/programs/copier/ShardPartitionPiece.cpp +++ /dev/null @@ -1,64 +0,0 @@ -#include "ShardPartitionPiece.h" - -#include "ShardPartition.h" -#include "TaskShard.h" - -#include - -namespace DB -{ - -ShardPartitionPiece::ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_) - : is_absent_piece(!is_present_piece_) - , current_piece_number(current_piece_number_) - , shard_partition(parent) -{ -} - -String ShardPartitionPiece::getPartitionPiecePath() const -{ - return shard_partition.getPartitionPath() + "/piece_" + toString(current_piece_number); -} - -String ShardPartitionPiece::getPartitionPieceCleanStartPath() const -{ - return getPartitionPiecePath() + "/clean_start"; -} - -String ShardPartitionPiece::getPartitionPieceIsDirtyPath() const -{ - return getPartitionPiecePath() + "/is_dirty"; -} - -String ShardPartitionPiece::getPartitionPieceIsCleanedPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaned"; -} - -String ShardPartitionPiece::getPartitionPieceActiveWorkersPath() const -{ - return getPartitionPiecePath() + "/partition_piece_active_workers"; -} - -String ShardPartitionPiece::getActiveWorkerPath() const -{ - return getPartitionPieceActiveWorkersPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -/// On what shards do we have current partition. -String ShardPartitionPiece::getPartitionPieceShardsPath() const -{ - return getPartitionPiecePath() + "/shards"; -} - -String ShardPartitionPiece::getShardStatusPath() const -{ - return getPartitionPieceShardsPath() + "/" + toString(shard_partition.task_shard.numberInCluster()); -} - -String ShardPartitionPiece::getPartitionPieceCleanerPath() const -{ - return getPartitionPieceIsDirtyPath() + "/cleaner"; -} - -} diff --git a/programs/copier/ShardPartitionPiece.h b/programs/copier/ShardPartitionPiece.h deleted file mode 100644 index 453364c0fc8..00000000000 --- a/programs/copier/ShardPartitionPiece.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include - -#include - -namespace DB -{ - -struct ShardPartition; - -struct ShardPartitionPiece -{ - ShardPartitionPiece(ShardPartition & parent, size_t current_piece_number_, bool is_present_piece_); - - String getPartitionPiecePath() const; - - String getPartitionPieceCleanStartPath() const; - - String getPartitionPieceIsDirtyPath() const; - - String getPartitionPieceIsCleanedPath() const; - - String getPartitionPieceActiveWorkersPath() const; - - String getActiveWorkerPath() const ; - - /// On what shards do we have current partition. - String getPartitionPieceShardsPath() const; - - String getShardStatusPath() const; - - String getPartitionPieceCleanerPath() const; - - bool is_absent_piece; - const size_t current_piece_number; - - ShardPartition & shard_partition; -}; - -using PartitionPieces = std::vector; - -} diff --git a/programs/copier/StatusAccumulator.cpp b/programs/copier/StatusAccumulator.cpp deleted file mode 100644 index 77adeac708c..00000000000 --- a/programs/copier/StatusAccumulator.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "StatusAccumulator.h" - -#include -#include -#include -#include - -#include - -namespace DB -{ - -StatusAccumulator::MapPtr StatusAccumulator::fromJSON(String state_json) -{ - Poco::JSON::Parser parser; - auto state = parser.parse(state_json).extract(); - MapPtr result_ptr = std::make_shared(); - for (const auto & table_name : state->getNames()) - { - auto table_status_json = state->getValue(table_name); - auto table_status = parser.parse(table_status_json).extract(); - /// Map entry will be created if it is absent - auto & map_table_status = (*result_ptr)[table_name]; - map_table_status.all_partitions_count += table_status->getValue("all_partitions_count"); - map_table_status.processed_partitions_count += table_status->getValue("processed_partitions_count"); - } - return result_ptr; -} - -String StatusAccumulator::serializeToJSON(MapPtr statuses) -{ - Poco::JSON::Object result_json; - for (const auto & [table_name, table_status] : *statuses) - { - Poco::JSON::Object status_json; - status_json.set("all_partitions_count", table_status.all_partitions_count); - status_json.set("processed_partitions_count", table_status.processed_partitions_count); - - result_json.set(table_name, status_json); - } - std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - oss.exceptions(std::ios::failbit); - Poco::JSON::Stringifier::stringify(result_json, oss); - auto result = oss.str(); - return result; -} - -} diff --git a/programs/copier/StatusAccumulator.h b/programs/copier/StatusAccumulator.h deleted file mode 100644 index d420b611602..00000000000 --- a/programs/copier/StatusAccumulator.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include - -#include -#include - -namespace DB -{ - -class StatusAccumulator -{ -public: - struct TableStatus - { - size_t all_partitions_count; - size_t processed_partitions_count; - }; - - using Map = std::unordered_map; - using MapPtr = std::shared_ptr; - - static MapPtr fromJSON(String state_json); - static String serializeToJSON(MapPtr statuses); -}; - -} diff --git a/programs/copier/TaskCluster.cpp b/programs/copier/TaskCluster.cpp deleted file mode 100644 index 0fb06616e50..00000000000 --- a/programs/copier/TaskCluster.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "TaskCluster.h" - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -TaskCluster::TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_) - : task_zookeeper_path(task_zookeeper_path_) - , default_local_database(default_local_database_) -{} - -void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - clusters_prefix = prefix + "remote_servers"; - if (!config.has(clusters_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix); - - Poco::Util::AbstractConfiguration::Keys tables_keys; - config.keys(prefix + "tables", tables_keys); - - for (const auto & table_key : tables_keys) - { - table_tasks.emplace_back(*this, config, prefix + "tables", table_key); - } -} - -void DB::TaskCluster::reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key) -{ - String prefix = base_key.empty() ? "" : base_key + "."; - - max_workers = config.getUInt64(prefix + "max_workers"); - - settings_common = Settings(); - if (config.has(prefix + "settings")) - settings_common.loadSettingsFromConfig(prefix + "settings", config); - - settings_common.prefer_localhost_replica = false; - - settings_pull = settings_common; - if (config.has(prefix + "settings_pull")) - settings_pull.loadSettingsFromConfig(prefix + "settings_pull", config); - - settings_push = settings_common; - if (config.has(prefix + "settings_push")) - settings_push.loadSettingsFromConfig(prefix + "settings_push", config); - - auto set_default_value = [] (auto && setting, auto && default_value) - { - setting = setting.changed ? setting.value : default_value; - }; - - /// Override important settings - settings_pull.readonly = 1; - settings_pull.prefer_localhost_replica = false; - settings_push.distributed_foreground_insert = true; - settings_push.prefer_localhost_replica = false; - - set_default_value(settings_pull.load_balancing, LoadBalancing::NEAREST_HOSTNAME); - set_default_value(settings_pull.max_threads, 1); - set_default_value(settings_pull.max_block_size, 8192UL); - set_default_value(settings_pull.preferred_block_size_bytes, 0); - - set_default_value(settings_push.distributed_background_insert_timeout, 0); - set_default_value(settings_push.alter_sync, 2); -} - -} - diff --git a/programs/copier/TaskCluster.h b/programs/copier/TaskCluster.h deleted file mode 100644 index a7f8bc3baca..00000000000 --- a/programs/copier/TaskCluster.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include "TaskTable.h" - -#include -#include - -#include - -#include - -namespace DB -{ - -struct TaskCluster -{ - TaskCluster(const String & task_zookeeper_path_, const String & default_local_database_); - - void loadTasks(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); - - /// Set (or update) settings and max_workers param - void reloadSettings(const Poco::Util::AbstractConfiguration & config, const String & base_key = ""); - - /// Base node for all tasks. Its structure: - /// workers/ - directory with active workers (amount of them is less or equal max_workers) - /// description - node with task configuration - /// table_table1/ - directories with per-partition copying status - String task_zookeeper_path; - - /// Database used to create temporary Distributed tables - String default_local_database; - - /// Limits number of simultaneous workers - UInt64 max_workers = 0; - - /// Base settings for pull and push - Settings settings_common; - /// Settings used to fetch data - Settings settings_pull; - /// Settings used to insert data - Settings settings_push; - - String clusters_prefix; - - /// Subtasks - TasksTable table_tasks; - - pcg64 random_engine; -}; - -} diff --git a/programs/copier/TaskShard.cpp b/programs/copier/TaskShard.cpp deleted file mode 100644 index d156f451a84..00000000000 --- a/programs/copier/TaskShard.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "TaskShard.h" - -#include "TaskTable.h" - -namespace DB -{ - -TaskShard::TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_) - : task_table(parent) - , info(info_) -{ - list_of_split_tables_on_shard.assign(task_table.number_of_splits, DatabaseAndTableName()); -} - -UInt32 TaskShard::numberInCluster() const -{ - return info.shard_num; -} - -UInt32 TaskShard::indexInCluster() const -{ - return info.shard_num - 1; -} - -String DB::TaskShard::getDescription() const -{ - return fmt::format("N{} (having a replica {}, pull table {} of cluster {}", - numberInCluster(), getHostNameExample(), getQuotedTable(task_table.table_pull), task_table.cluster_pull_name); -} - -String DB::TaskShard::getHostNameExample() const -{ - const auto & replicas = task_table.cluster_pull->getShardsAddresses().at(indexInCluster()); - return replicas.at(0).readableString(); -} - -} diff --git a/programs/copier/TaskShard.h b/programs/copier/TaskShard.h deleted file mode 100644 index 05d652077ea..00000000000 --- a/programs/copier/TaskShard.h +++ /dev/null @@ -1,56 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "Internals.h" -#include "ClusterPartition.h" -#include "ShardPartition.h" - - -namespace DB -{ - -struct TaskTable; - -struct TaskShard -{ - TaskShard(TaskTable & parent, const Cluster::ShardInfo & info_); - - TaskTable & task_table; - - Cluster::ShardInfo info; - - UInt32 numberInCluster() const; - - UInt32 indexInCluster() const; - - String getDescription() const; - - String getHostNameExample() const; - - /// Used to sort clusters by their proximity - ShardPriority priority; - - /// Column with unique destination partitions (computed from engine_push_partition_key expr.) in the shard - ColumnWithTypeAndName partition_key_column; - - /// There is a task for each destination partition - TasksPartition partition_tasks; - - /// Which partitions have been checked for existence - /// If some partition from this lists is exists, it is in partition_tasks - std::set checked_partitions; - - /// Last CREATE TABLE query of the table of the shard - ASTPtr current_pull_table_create_query; - ASTPtr current_push_table_create_query; - - /// Internal distributed tables - DatabaseAndTableName table_read_shard; - DatabaseAndTableName main_table_split_shard; - ListOfDatabasesAndTableNames list_of_split_tables_on_shard; -}; - -using TaskShardPtr = std::shared_ptr; -using TasksShard = std::vector; - -} diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp deleted file mode 100644 index d055ceb4c7b..00000000000 --- a/programs/copier/TaskTable.cpp +++ /dev/null @@ -1,222 +0,0 @@ -#include "TaskTable.h" - -#include "ClusterPartition.h" -#include "TaskCluster.h" - -#include -#include - -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int LOGICAL_ERROR; -} - -TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, - const String & prefix_, const String & table_key) - : task_cluster(parent) -{ - String table_prefix = prefix_ + "." + table_key + "."; - - name_in_config = table_key; - - number_of_splits = config.getUInt64(table_prefix + "number_of_splits", 3); - - allow_to_copy_alias_and_materialized_columns = config.getBool(table_prefix + "allow_to_copy_alias_and_materialized_columns", false); - allow_to_drop_target_partitions = config.getBool(table_prefix + "allow_to_drop_target_partitions", false); - - cluster_pull_name = config.getString(table_prefix + "cluster_pull"); - cluster_push_name = config.getString(table_prefix + "cluster_push"); - - table_pull.first = config.getString(table_prefix + "database_pull"); - table_pull.second = config.getString(table_prefix + "table_pull"); - - table_push.first = config.getString(table_prefix + "database_push"); - table_push.second = config.getString(table_prefix + "table_push"); - - /// Used as node name in ZooKeeper - table_id = escapeForFileName(cluster_push_name) - + "." + escapeForFileName(table_push.first) - + "." + escapeForFileName(table_push.second); - - engine_push_str = config.getString(table_prefix + "engine", "rand()"); - - { - ParserStorage parser_storage{ParserStorage::TABLE_ENGINE}; - engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - engine_push_partition_key_ast = extractPartitionKey(engine_push_ast); - primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", "); - is_replicated_table = isReplicatedTableEngine(engine_push_ast); - } - - sharding_key_str = config.getString(table_prefix + "sharding_key"); - - auxiliary_engine_split_asts.reserve(number_of_splits); - { - ParserExpressionWithOptionalAlias parser_expression(false); - sharding_key_ast = parseQuery(parser_expression, sharding_key_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second, - sharding_key_ast); - - for (const auto piece_number : collections::range(0, number_of_splits)) - { - auxiliary_engine_split_asts.emplace_back - ( - createASTStorageDistributed(cluster_push_name, table_push.first, - table_push.second + "_piece_" + toString(piece_number), sharding_key_ast) - ); - } - } - - where_condition_str = config.getString(table_prefix + "where_condition", ""); - if (!where_condition_str.empty()) - { - ParserExpressionWithOptionalAlias parser_expression(false); - where_condition_ast = parseQuery(parser_expression, where_condition_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - // Will use canonical expression form - where_condition_str = queryToString(where_condition_ast); - } - - String enabled_partitions_prefix = table_prefix + "enabled_partitions"; - has_enabled_partitions = config.has(enabled_partitions_prefix); - - if (has_enabled_partitions) - { - Strings keys; - config.keys(enabled_partitions_prefix, keys); - - if (keys.empty()) - { - /// Parse list of partition from space-separated string - String partitions_str = config.getString(table_prefix + "enabled_partitions"); - boost::trim_if(partitions_str, isWhitespaceASCII); - boost::split(enabled_partitions, partitions_str, isWhitespaceASCII, boost::token_compress_on); - } - else - { - /// Parse sequence of ... - for (const String &key : keys) - { - if (!startsWith(key, "partition")) - throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix); - - enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); - } - } - - std::copy(enabled_partitions.begin(), enabled_partitions.end(), std::inserter(enabled_partitions_set, enabled_partitions_set.begin())); - } -} - - -String TaskTable::getPartitionPath(const String & partition_name) const -{ - return task_cluster.task_zookeeper_path // root - + "/tables/" + table_id // tables/dst_cluster.merge.hits - + "/" + escapeForFileName(partition_name); // 201701 -} - -String TaskTable::getPartitionAttachIsActivePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_active"; -} - -String TaskTable::getPartitionAttachIsDonePath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/attach_is_done"; -} - -String TaskTable::getPartitionPiecePath(const String & partition_name, size_t piece_number) const -{ - assert(piece_number < number_of_splits); - return getPartitionPath(partition_name) + "/piece_" + toString(piece_number); // 1...number_of_splits -} - -String TaskTable::getCertainPartitionIsDirtyPath(const String &partition_name) const -{ - return getPartitionPath(partition_name) + "/is_dirty"; -} - -String TaskTable::getCertainPartitionPieceIsDirtyPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/is_dirty"; -} - -String TaskTable::getCertainPartitionIsCleanedPath(const String & partition_name) const -{ - return getCertainPartitionIsDirtyPath(partition_name) + "/cleaned"; -} - -String TaskTable::getCertainPartitionPieceIsCleanedPath(const String & partition_name, const size_t piece_number) const -{ - return getCertainPartitionPieceIsDirtyPath(partition_name, piece_number) + "/cleaned"; -} - -String TaskTable::getCertainPartitionTaskStatusPath(const String & partition_name) const -{ - return getPartitionPath(partition_name) + "/shards"; -} - -String TaskTable::getCertainPartitionPieceTaskStatusPath(const String & partition_name, const size_t piece_number) const -{ - return getPartitionPiecePath(partition_name, piece_number) + "/shards"; -} - -bool TaskTable::isReplicatedTable() const -{ - return is_replicated_table; -} - -String TaskTable::getStatusAllPartitionCount() const -{ - return task_cluster.task_zookeeper_path + "/status/all_partitions_count"; -} - -String TaskTable::getStatusProcessedPartitionsCount() const -{ - return task_cluster.task_zookeeper_path + "/status/processed_partitions_count"; -} - -ASTPtr TaskTable::rewriteReplicatedCreateQueryToPlain() const -{ - ASTPtr prev_engine_push_ast = engine_push_ast->clone(); - - auto & new_storage_ast = prev_engine_push_ast->as(); - auto & new_engine_ast = new_storage_ast.engine->as(); - - /// Remove "Replicated" from name - new_engine_ast.name = new_engine_ast.name.substr(10); - - if (new_engine_ast.arguments) - { - auto & replicated_table_arguments = new_engine_ast.arguments->children; - - - /// In some cases of Atomic database engine usage ReplicatedMergeTree tables - /// could be created without arguments. - if (!replicated_table_arguments.empty()) - { - /// Delete first two arguments of Replicated...MergeTree() table. - replicated_table_arguments.erase(replicated_table_arguments.begin()); - replicated_table_arguments.erase(replicated_table_arguments.begin()); - } - } - - return new_storage_ast.clone(); -} - -ClusterPartition & TaskTable::getClusterPartition(const String & partition_name) -{ - auto it = cluster_partitions.find(partition_name); - if (it == cluster_partitions.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id); - return it->second; -} - -} diff --git a/programs/copier/TaskTable.h b/programs/copier/TaskTable.h deleted file mode 100644 index 2bb7f078bc6..00000000000 --- a/programs/copier/TaskTable.h +++ /dev/null @@ -1,173 +0,0 @@ -#pragma once - -#include "Aliases.h" -#include "TaskShard.h" - - -namespace DB -{ - -struct ClusterPartition; -struct TaskCluster; - -struct TaskTable -{ - TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfiguration & config, const String & prefix, const String & table_key); - - TaskCluster & task_cluster; - - /// These functions used in checkPartitionIsDone() or checkPartitionPieceIsDone() - /// They are implemented here not to call task_table.tasks_shard[partition_name].second.pieces[current_piece_number] etc. - - String getPartitionPath(const String & partition_name) const; - - String getPartitionAttachIsActivePath(const String & partition_name) const; - - String getPartitionAttachIsDonePath(const String & partition_name) const; - - String getPartitionPiecePath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsDirtyPath(const String & partition_name) const; - - String getCertainPartitionPieceIsDirtyPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionIsCleanedPath(const String & partition_name) const; - - String getCertainPartitionPieceIsCleanedPath(const String & partition_name, size_t piece_number) const; - - String getCertainPartitionTaskStatusPath(const String & partition_name) const; - - String getCertainPartitionPieceTaskStatusPath(const String & partition_name, size_t piece_number) const; - - bool isReplicatedTable() const; - - /// These nodes are used for check-status option - String getStatusAllPartitionCount() const; - String getStatusProcessedPartitionsCount() const; - - /// Partitions will be split into number-of-splits pieces. - /// Each piece will be copied independently. (10 by default) - size_t number_of_splits; - - bool allow_to_copy_alias_and_materialized_columns{false}; - bool allow_to_drop_target_partitions{false}; - - String name_in_config; - - /// Used as task ID - String table_id; - - /// Column names in primary key - String primary_key_comma_separated; - - /// Source cluster and table - String cluster_pull_name; - DatabaseAndTableName table_pull; - - /// Destination cluster and table - String cluster_push_name; - DatabaseAndTableName table_push; - - /// Storage of destination table - /// (tables that are stored on each shard of target cluster) - String engine_push_str; - ASTPtr engine_push_ast; - ASTPtr engine_push_partition_key_ast; - - /// First argument of Replicated...MergeTree() - String engine_push_zk_path; - bool is_replicated_table; - - ASTPtr rewriteReplicatedCreateQueryToPlain() const; - - /* - * A Distributed table definition used to split data - * Distributed table will be created on each shard of default - * cluster to perform data copying and resharding - * */ - String sharding_key_str; - ASTPtr sharding_key_ast; - ASTPtr main_engine_split_ast; - - /* - * To copy partition piece form one cluster to another we have to use Distributed table. - * In case of usage separate table (engine_push) for each partition piece, - * we have to use many Distributed tables. - * */ - ASTs auxiliary_engine_split_asts; - - /// Additional WHERE expression to filter input data - String where_condition_str; - ASTPtr where_condition_ast; - - /// Resolved clusters - ClusterPtr cluster_pull; - ClusterPtr cluster_push; - - /// Filter partitions that should be copied - bool has_enabled_partitions = false; - Strings enabled_partitions; - NameSet enabled_partitions_set; - - /** - * Prioritized list of shards - * all_shards contains information about all shards in the table. - * So we have to check whether particular shard have current partition or not while processing. - */ - TasksShard all_shards; - TasksShard local_shards; - - /// All partitions of the current table. - ClusterPartitions cluster_partitions; - NameSet finished_cluster_partitions; - - /// Partition names to process in user-specified order - Strings ordered_partition_names; - - ClusterPartition & getClusterPartition(const String & partition_name); - - Stopwatch watch; - UInt64 bytes_copied = 0; - UInt64 rows_copied = 0; - - template - void initShards(RandomEngine &&random_engine); -}; - -using TasksTable = std::list; - - -template -inline void TaskTable::initShards(RandomEngine && random_engine) -{ - const String & fqdn_name = getFQDNOrHostName(); - std::uniform_int_distribution get_urand(0, std::numeric_limits::max()); - - // Compute the priority - for (const auto & shard_info : cluster_pull->getShardsInfo()) - { - TaskShardPtr task_shard = std::make_shared(*this, shard_info); - const auto & replicas = cluster_pull->getShardsAddresses().at(task_shard->indexInCluster()); - task_shard->priority = getReplicasPriority(replicas, fqdn_name, get_urand(random_engine)); - - all_shards.emplace_back(task_shard); - } - - // Sort by priority - std::sort(all_shards.begin(), all_shards.end(), - [](const TaskShardPtr & lhs, const TaskShardPtr & rhs) - { - return ShardPriority::greaterPriority(lhs->priority, rhs->priority); - }); - - // Cut local shards - auto it_first_remote = std::lower_bound(all_shards.begin(), all_shards.end(), 1, - [](const TaskShardPtr & lhs, UInt8 is_remote) - { - return lhs->priority.is_remote < is_remote; - }); - - local_shards.assign(all_shards.begin(), it_first_remote); -} - -} diff --git a/programs/copier/ZooKeeperStaff.h b/programs/copier/ZooKeeperStaff.h deleted file mode 100644 index c15db73f060..00000000000 --- a/programs/copier/ZooKeeperStaff.h +++ /dev/null @@ -1,221 +0,0 @@ -#pragma once - -/** Allows to compare two incremental counters of type UInt32 in presence of possible overflow. - * We assume that we compare values that are not too far away. - * For example, when we increment 0xFFFFFFFF, we get 0. So, 0xFFFFFFFF is less than 0. - */ -class WrappingUInt32 -{ -public: - UInt32 value; - - explicit WrappingUInt32(UInt32 _value) - : value(_value) - {} - - bool operator<(const WrappingUInt32 & other) const - { - return value != other.value && *this <= other; - } - - bool operator<=(const WrappingUInt32 & other) const - { - const UInt32 HALF = static_cast(1) << 31; - return (value <= other.value && other.value - value < HALF) - || (value > other.value && value - other.value > HALF); - } - - bool operator==(const WrappingUInt32 & other) const - { - return value == other.value; - } -}; - -/** Conforming Zxid definition. - * cf. https://github.com/apache/zookeeper/blob/631d1b284f0edb1c4f6b0fb221bf2428aec71aaa/zookeeper-docs/src/main/resources/markdown/zookeeperInternals.md#guarantees-properties-and-definitions - * - * But it is better to read this: https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html - * - * Actually here is the definition of Zxid. - * Every change to the ZooKeeper state receives a stamp in the form of a zxid (ZooKeeper Transaction Id). - * This exposes the total ordering of all changes to ZooKeeper. Each change will have a unique zxid - * and if zxid1 is smaller than zxid2 then zxid1 happened before zxid2. - */ -class Zxid -{ -public: - WrappingUInt32 epoch; - WrappingUInt32 counter; - explicit Zxid(UInt64 _zxid) - : epoch(static_cast(_zxid >> 32)) - , counter(static_cast(_zxid)) - {} - - bool operator<=(const Zxid & other) const - { - return (epoch < other.epoch) - || (epoch == other.epoch && counter <= other.counter); - } - - bool operator==(const Zxid & other) const - { - return epoch == other.epoch && counter == other.counter; - } -}; - -/* When multiple ClusterCopiers discover that the target partition is not empty, - * they will attempt to clean up this partition before proceeding to copying. - * - * Instead of purging is_dirty, the history of cleaning work is preserved and partition hygiene is established - * based on a happens-before relation between the events. - * This relation is encoded by LogicalClock based on the mzxid of the is_dirty ZNode and is_dirty/cleaned. - * The fact of the partition hygiene is encoded by CleanStateClock. - * - * For you to know what mzxid means: - * - * ZooKeeper Stat Structure: - * The Stat structure for each znode in ZooKeeper is made up of the following fields: - * - * -- czxid - * The zxid of the change that caused this znode to be created. - * - * -- mzxid - * The zxid of the change that last modified this znode. - * - * -- ctime - * The time in milliseconds from epoch when this znode was created. - * - * -- mtime - * The time in milliseconds from epoch when this znode was last modified. - * - * -- version - * The number of changes to the data of this znode. - * - * -- cversion - * The number of changes to the children of this znode. - * - * -- aversion - * The number of changes to the ACL of this znode. - * - * -- ephemeralOwner - * The session id of the owner of this znode if the znode is an ephemeral node. - * If it is not an ephemeral node, it will be zero. - * - * -- dataLength - * The length of the data field of this znode. - * - * -- numChildren - * The number of children of this znode. - * */ - -class LogicalClock -{ -public: - std::optional zxid; - - LogicalClock() = default; - - explicit LogicalClock(UInt64 _zxid) - : zxid(_zxid) - {} - - bool hasHappened() const - { - return bool(zxid); - } - - /// happens-before relation with a reasonable time bound - bool happensBefore(const LogicalClock & other) const - { - return !zxid - || (other.zxid && *zxid <= *other.zxid); - } - - bool operator<=(const LogicalClock & other) const - { - return happensBefore(other); - } - - /// strict equality check - bool operator==(const LogicalClock & other) const - { - return zxid == other.zxid; - } -}; - - -class CleanStateClock -{ -public: - LogicalClock discovery_zxid; - std::optional discovery_version; - - LogicalClock clean_state_zxid; - std::optional clean_state_version; - - std::shared_ptr stale; - - bool is_clean() const - { - return !is_stale() - && (!discovery_zxid.hasHappened() || (clean_state_zxid.hasHappened() && discovery_zxid <= clean_state_zxid)); - } - - bool is_stale() const - { - return stale->load(); - } - - CleanStateClock( - const zkutil::ZooKeeperPtr & zookeeper, - const String & discovery_path, - const String & clean_state_path) - : stale(std::make_shared(false)) - { - Coordination::Stat stat{}; - String _some_data; - auto watch_callback = - [my_stale = stale] (const Coordination::WatchResponse & rsp) - { - auto logger = getLogger("ClusterCopier"); - if (rsp.error == Coordination::Error::ZOK) - { - switch (rsp.type) /// NOLINT(bugprone-switch-missing-default-case) - { - case Coordination::CREATED: - LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path); - my_stale->store(true); - break; - case Coordination::CHANGED: - LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path); - my_stale->store(true); - } - } - }; - if (zookeeper->tryGetWatch(discovery_path, _some_data, &stat, watch_callback)) - { - discovery_zxid = LogicalClock(stat.mzxid); - discovery_version = stat.version; - } - if (zookeeper->tryGetWatch(clean_state_path, _some_data, &stat, watch_callback)) - { - clean_state_zxid = LogicalClock(stat.mzxid); - clean_state_version = stat.version; - } - } - - bool operator==(const CleanStateClock & other) const - { - return !is_stale() - && !other.is_stale() - && discovery_zxid == other.discovery_zxid - && discovery_version == other.discovery_version - && clean_state_zxid == other.clean_state_zxid - && clean_state_version == other.clean_state_version; - } - - bool operator!=(const CleanStateClock & other) const - { - return !(*this == other); - } -}; diff --git a/programs/copier/clickhouse-copier.cpp b/programs/copier/clickhouse-copier.cpp deleted file mode 100644 index 4dabb01775b..00000000000 --- a/programs/copier/clickhouse-copier.cpp +++ /dev/null @@ -1 +0,0 @@ -int mainEntryClickHouseClusterCopier(int argc, char ** argv); diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index ae09d207091..eb7c70cf498 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -94,7 +94,7 @@ 8123 - - - system -
query_log
- - - 1000 - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs/config-copier.xml b/tests/integration/test_cluster_copier/configs/config-copier.xml deleted file mode 100644 index 590b1892f8d..00000000000 --- a/tests/integration/test_cluster_copier/configs/config-copier.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - diff --git a/tests/integration/test_cluster_copier/configs/users.xml b/tests/integration/test_cluster_copier/configs/users.xml deleted file mode 100644 index b463dfc81e7..00000000000 --- a/tests/integration/test_cluster_copier/configs/users.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - 1 - - 5 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml deleted file mode 100644 index 9de7b57de27..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/clusters.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - - - false - - first - 9000 - - - - false - - second - 9000 - - - - false - - third - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml b/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml deleted file mode 100644 index d0cab0fafb7..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/config-copier.xml +++ /dev/null @@ -1,27 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - - - - zoo1 - 2181 - - - zoo2 - 2181 - - - zoo3 - 2181 - - 2000 - - diff --git a/tests/integration/test_cluster_copier/configs_three_nodes/users.xml b/tests/integration/test_cluster_copier/configs_three_nodes/users.xml deleted file mode 100644 index badaf46a5ca..00000000000 --- a/tests/integration/test_cluster_copier/configs_three_nodes/users.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - 1 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml deleted file mode 100644 index 38d88308631..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/clusters.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml deleted file mode 100644 index 64fa32335ab..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/ddl.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - /clickhouse/task_queue/ddl - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml b/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml deleted file mode 100644 index 8306f40ad6a..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/conf.d/storage_configuration.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - - /jbod1/ - - - /jbod2/ - - - /external/ - - - - - - - - external - -
- jbod1 - jbod2 -
-
-
-
- -
- -
diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml b/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml deleted file mode 100644 index 55bd24816ae..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/config-copier.xml +++ /dev/null @@ -1,19 +0,0 @@ - - - information - /var/log/clickhouse-server/copier/log.log - /var/log/clickhouse-server/copier/log.err.log - 1000M - 10 - /var/log/clickhouse-server/copier/stderr.log - /var/log/clickhouse-server/copier/stdout.log - - - - - zoo1 - 2181 - - 2000 - - diff --git a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml b/tests/integration/test_cluster_copier/configs_two_nodes/users.xml deleted file mode 100644 index badaf46a5ca..00000000000 --- a/tests/integration/test_cluster_copier/configs_two_nodes/users.xml +++ /dev/null @@ -1,32 +0,0 @@ - - - - 1 - 1 - - - - - - - - ::/0 - - default - default - - - 12345678 - - ::/0 - - default - default - - - - - - - - diff --git a/tests/integration/test_cluster_copier/task0_description.xml b/tests/integration/test_cluster_copier/task0_description.xml deleted file mode 100644 index 8d74d0bdde0..00000000000 --- a/tests/integration/test_cluster_copier/task0_description.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - cluster0 - default - hits - - cluster1 - default - hits - - 2 - - 3 4 5 6 1 2 0 - - - ENGINE=ReplicatedMergeTree PARTITION BY d % 3 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - 255.255.255.255 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_drop_target_partition.xml b/tests/integration/test_cluster_copier/task_drop_target_partition.xml deleted file mode 100644 index dc8e6452243..00000000000 --- a/tests/integration/test_cluster_copier/task_drop_target_partition.xml +++ /dev/null @@ -1,41 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_drop_target_partition - source - - destination - db_drop_target_partition - destination - - true - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_month_to_week_description.xml b/tests/integration/test_cluster_copier/task_month_to_week_description.xml deleted file mode 100644 index bc290ca397f..00000000000 --- a/tests/integration/test_cluster_copier/task_month_to_week_description.xml +++ /dev/null @@ -1,99 +0,0 @@ - - - 4 - - - - 1 - 2 - - - - 0 - - - - - - cluster0 - default - a - - cluster1 - default - b - - - - 2 - - - ENGINE= - ReplicatedMergeTree - PARTITION BY toMonday(date) - ORDER BY d - - - - jumpConsistentHash(intHash64(d), 2) - - - - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - 255.255.255.255 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_no_arg.xml b/tests/integration/test_cluster_copier/task_no_arg.xml deleted file mode 100644 index 262ff073537..00000000000 --- a/tests/integration/test_cluster_copier/task_no_arg.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 1 - - s0_0_0 - 9000 - - - - - - - 1 - - s1_1_0 - 9000 - - - - - - 1 - - - - source_cluster - default - copier_test1 - - default_cluster - default - copier_test1_1 - ENGINE = MergeTree PARTITION BY date ORDER BY (date, sipHash64(date)) SAMPLE BY sipHash64(date) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_no_index.xml b/tests/integration/test_cluster_copier/task_no_index.xml deleted file mode 100644 index 265f99e21a6..00000000000 --- a/tests/integration/test_cluster_copier/task_no_index.xml +++ /dev/null @@ -1,109 +0,0 @@ - - - - - false - - s0_0_0 - 9000 - - - - - - - false - - s1_1_0 - 9000 - - - - - - - 2 - - - - 1 - - - - - 0 - - - - - 3 - - 1 - - - - - - - - source_cluster - default - ontime - - - - destination_cluster - default - ontime22 - - - - - - - ENGINE = MergeTree() PARTITION BY Year ORDER BY (Year, FlightDate) SETTINGS index_granularity=8192 - - - - - jumpConsistentHash(intHash64(Year), 2) - - - - - - - 2017 - - - - - - - diff --git a/tests/integration/test_cluster_copier/task_non_partitioned_table.xml b/tests/integration/test_cluster_copier/task_non_partitioned_table.xml deleted file mode 100644 index d5424b95f45..00000000000 --- a/tests/integration/test_cluster_copier/task_non_partitioned_table.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - 1 - - s0_0_0 - 9000 - - - - - - - 1 - - s1_1_0 - 9000 - - - - - - 1 - - - - source_cluster - default - copier_test1 - - default_cluster - default - copier_test1_1 - ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_self_copy.xml b/tests/integration/test_cluster_copier/task_self_copy.xml deleted file mode 100644 index 21d577bc397..00000000000 --- a/tests/integration/test_cluster_copier/task_self_copy.xml +++ /dev/null @@ -1,63 +0,0 @@ - - 9440 - - - - false - - s0_0_0 - 9000 - dbuser - 12345678 - 0 - - - - - - - false - - s0_0_0 - 9000 - dbuser - 12345678 - 0 - - - - - - 2 - - - 1 - - - - 0 - - - - 3 - 1 - - - - - source_cluster - db1 - source_table - - destination_cluster - db2 - destination_table - - - ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192 - - - rand() - - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/task_skip_index.xml b/tests/integration/test_cluster_copier/task_skip_index.xml deleted file mode 100644 index b04cec963d4..00000000000 --- a/tests/integration/test_cluster_copier/task_skip_index.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_skip_index - source - - destination - db_skip_index - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_taxi_data.xml b/tests/integration/test_cluster_copier/task_taxi_data.xml deleted file mode 100644 index 94fa5087338..00000000000 --- a/tests/integration/test_cluster_copier/task_taxi_data.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - - false - - first - 9000 - - - - false - - second - 9000 - - - - false - - third - 9000 - - - - - - 2 - - - - events - dailyhistory - yellow_tripdata_staging - events - monthlyhistory - yellow_tripdata_staging - Engine=ReplacingMergeTree() PRIMARY KEY (tpep_pickup_datetime, id) ORDER BY (tpep_pickup_datetime, id) PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime)) - sipHash64(id) % 3 - - - \ No newline at end of file diff --git a/tests/integration/test_cluster_copier/task_test_block_size.xml b/tests/integration/test_cluster_copier/task_test_block_size.xml deleted file mode 100644 index bf29c7e1832..00000000000 --- a/tests/integration/test_cluster_copier/task_test_block_size.xml +++ /dev/null @@ -1,101 +0,0 @@ - - - 1 - - - - 1 - - - - - - - - - shard_0_0 - default - test_block_size - - cluster1 - default - test_block_size - - - '1970-01-01' - - - - ENGINE= - ReplicatedMergeTree - ORDER BY d PARTITION BY partition - - - - jumpConsistentHash(intHash64(d), 2) - - - - - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - true - - s0_1_0 - 9000 - - - - - - - true - - s1_0_0 - 9000 - - - s1_0_1 - 9000 - - - - true - - s1_1_0 - 9000 - - - - - - - true - - s0_0_0 - 9000 - - - s0_0_1 - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_trivial.xml b/tests/integration/test_cluster_copier/task_trivial.xml deleted file mode 100644 index a3b8bc03888..00000000000 --- a/tests/integration/test_cluster_copier/task_trivial.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - source_trivial_cluster - default - trivial - - destination_trivial_cluster - default - trivial - - - ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster{cluster}/{shard}/hits', '{replica}') PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - - first_trivial - 9000 - - - - - - - - - second_trivial - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml b/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml deleted file mode 100644 index 0197dee0181..00000000000 --- a/tests/integration/test_cluster_copier/task_trivial_without_arguments.xml +++ /dev/null @@ -1,63 +0,0 @@ - - - 3 - - - - 1 - - - - - 0 - - - - - - - - - - source_trivial_cluster - default - trivial_without_arguments - - destination_trivial_cluster - default - trivial_without_arguments - - - ENGINE=ReplicatedMergeTree() PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16 - - - d + 1 - - - d - d = 0 - - - - - - - - - first_trivial - 9000 - - - - - - - - - second_trivial - 9000 - - - - - - diff --git a/tests/integration/test_cluster_copier/task_ttl_columns.xml b/tests/integration/test_cluster_copier/task_ttl_columns.xml deleted file mode 100644 index 2069c509c87..00000000000 --- a/tests/integration/test_cluster_copier/task_ttl_columns.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_ttl_columns - source - - destination - db_ttl_columns - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml b/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml deleted file mode 100644 index 2a51fa7a66d..00000000000 --- a/tests/integration/test_cluster_copier/task_ttl_move_to_volume.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_move_to_volume - source - - destination - db_move_to_volume - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column3, Column2, Column1) TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' SETTINGS storage_policy = 'external_with_jbods' - rand() - - - diff --git a/tests/integration/test_cluster_copier/task_with_different_schema.xml b/tests/integration/test_cluster_copier/task_with_different_schema.xml deleted file mode 100644 index e1e6ee4dc42..00000000000 --- a/tests/integration/test_cluster_copier/task_with_different_schema.xml +++ /dev/null @@ -1,39 +0,0 @@ - - - - - false - - first_of_two - 9000 - - - - - - false - - second_of_two - 9000 - - - - - - 2 - - - - source - db_different_schema - source - - destination - db_different_schema - destination - - ENGINE = MergeTree() PARTITION BY toYYYYMMDD(Column3) ORDER BY (Column9, Column1, Column2, Column3, Column4) - rand() - - - diff --git a/tests/integration/test_cluster_copier/test.py b/tests/integration/test_cluster_copier/test.py deleted file mode 100644 index be71fc21e33..00000000000 --- a/tests/integration/test_cluster_copier/test.py +++ /dev/null @@ -1,653 +0,0 @@ -import os -import random -import sys -import time -import kazoo -import pytest -import string -import random -from contextlib import contextmanager -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -COPYING_FAIL_PROBABILITY = 0.2 -MOVING_FAIL_PROBABILITY = 0.2 - -cluster = ClickHouseCluster(__file__) - - -def generateRandomString(count): - return "".join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(count) - ) - - -def check_all_hosts_sucesfully_executed(tsv_content, num_hosts): - M = TSV.toMat(tsv_content) - hosts = [(l[0], l[1]) for l in M] # (host, port) - codes = [l[2] for l in M] - - assert len(hosts) == num_hosts and len(set(hosts)) == num_hosts, "\n" + tsv_content - assert len(set(codes)) == 1, "\n" + tsv_content - assert codes[0] == "0", "\n" + tsv_content - - -def ddl_check_query(instance, query, num_hosts=3): - contents = instance.query(query) - check_all_hosts_sucesfully_executed(contents, num_hosts) - return contents - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - clusters_schema = { - "0": {"0": ["0", "1"], "1": ["0"]}, - "1": {"0": ["0", "1"], "1": ["0"]}, - } - - for cluster_name, shards in clusters_schema.items(): - for shard_name, replicas in shards.items(): - for replica_name in replicas: - name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name) - cluster.add_instance( - name, - main_configs=[ - "configs/conf.d/query_log.xml", - "configs/conf.d/ddl.xml", - "configs/conf.d/clusters.xml", - ], - user_configs=["configs/users.xml"], - macros={ - "cluster": cluster_name, - "shard": shard_name, - "replica": replica_name, - }, - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class Task1: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_simple_" + generateRandomString(10) - self.container_task_file = "/task0_description.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task0_description.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - for cluster_num in ["0", "1"]: - ddl_check_query( - instance, - "DROP DATABASE IF EXISTS default ON CLUSTER cluster{} SYNC".format( - cluster_num - ), - ) - ddl_check_query( - instance, - "CREATE DATABASE default ON CLUSTER cluster{} ".format(cluster_num), - ) - - ddl_check_query( - instance, - "CREATE TABLE hits ON CLUSTER cluster0 (d UInt64, d1 UInt64 MATERIALIZED d+1) " - + "ENGINE=ReplicatedMergeTree " - + "PARTITION BY d % 3 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16", - ) - ddl_check_query( - instance, - "CREATE TABLE hits_all ON CLUSTER cluster0 (d UInt64) ENGINE=Distributed(cluster0, default, hits, d)", - ) - ddl_check_query( - instance, - "CREATE TABLE hits_all ON CLUSTER cluster1 (d UInt64) ENGINE=Distributed(cluster1, default, hits, d + 1)", - ) - instance.query( - "INSERT INTO hits_all SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - assert ( - self.cluster.instances["s0_0_0"] - .query("SELECT count() FROM hits_all") - .strip() - == "1002" - ) - assert ( - self.cluster.instances["s1_0_0"] - .query("SELECT count() FROM hits_all") - .strip() - == "1002" - ) - - assert ( - self.cluster.instances["s1_0_0"] - .query("SELECT DISTINCT d % 2 FROM hits") - .strip() - == "1" - ) - assert ( - self.cluster.instances["s1_1_0"] - .query("SELECT DISTINCT d % 2 FROM hits") - .strip() - == "0" - ) - - instance = self.cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits_all ON CLUSTER cluster1") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE hits ON CLUSTER cluster1") - - -class Task2: - def __init__(self, cluster, unique_zk_path): - self.cluster = cluster - self.zk_task_path = ( - "/clickhouse-copier/task_month_to_week_partition_" + generateRandomString(5) - ) - self.unique_zk_path = generateRandomString(10) - self.container_task_file = "/task_month_to_week_description.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_month_to_week_description.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - for cluster_num in ["0", "1"]: - ddl_check_query( - instance, - "DROP DATABASE IF EXISTS default ON CLUSTER cluster{}".format( - cluster_num - ), - ) - ddl_check_query( - instance, - "CREATE DATABASE IF NOT EXISTS default ON CLUSTER cluster{}".format( - cluster_num - ), - ) - - ddl_check_query( - instance, - "CREATE TABLE a ON CLUSTER cluster0 (date Date, d UInt64, d1 UInt64 ALIAS d+1) " - "ENGINE=ReplicatedMergeTree('/clickhouse/tables/cluster_{cluster}/{shard}/" - + self.unique_zk_path - + "', " - "'{replica}', date, intHash64(d), (date, intHash64(d)), 8192)", - ) - ddl_check_query( - instance, - "CREATE TABLE a_all ON CLUSTER cluster0 (date Date, d UInt64) ENGINE=Distributed(cluster0, default, a, d)", - ) - - instance.query( - "INSERT INTO a_all SELECT toDate(17581 + number) AS date, number AS d FROM system.numbers LIMIT 85", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - assert TSV( - self.cluster.instances["s0_0_0"].query( - "SELECT count() FROM cluster(cluster0, default, a)" - ) - ) == TSV("85\n") - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT count(), uniqExact(date) FROM cluster(cluster1, default, b)" - ) - ) == TSV("85\t85\n") - - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b" - ) - ) == TSV("0\n") - assert TSV( - self.cluster.instances["s1_1_0"].query( - "SELECT DISTINCT jumpConsistentHash(intHash64(d), 2) FROM b" - ) - ) == TSV("1\n") - - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'" - ) - ) == TSV("1\n") - assert TSV( - self.cluster.instances["s1_1_0"].query( - "SELECT uniqExact(partition) IN (12, 13) FROM system.parts WHERE active AND database='default' AND table='b'" - ) - ) == TSV("1\n") - - instance = cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE a ON CLUSTER cluster0") - ddl_check_query(instance, "DROP TABLE b ON CLUSTER cluster1") - - -class Task_test_block_size: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = ( - "/clickhouse-copier/task_test_block_size_" + generateRandomString(5) - ) - self.rows = 1000000 - self.container_task_file = "/task_test_block_size.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_test_block_size.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - - ddl_check_query( - instance, - """ - CREATE TABLE test_block_size ON CLUSTER shard_0_0 (partition Date, d UInt64) - ENGINE=ReplicatedMergeTree - ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d)""", - 2, - ) - - instance.query( - "INSERT INTO test_block_size SELECT toDate(0) AS partition, number as d FROM system.numbers LIMIT {}".format( - self.rows - ) - ) - - def check(self): - assert TSV( - self.cluster.instances["s1_0_0"].query( - "SELECT count() FROM cluster(cluster1, default, test_block_size)" - ) - ) == TSV("{}\n".format(self.rows)) - - instance = cluster.instances["s0_0_0"] - ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER shard_0_0", 2) - ddl_check_query(instance, "DROP TABLE test_block_size ON CLUSTER cluster1") - - -class Task_no_index: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_no_index_" + generateRandomString( - 5 - ) - self.rows = 1000000 - self.container_task_file = "/task_no_index.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_no_index.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS ontime SYNC") - instance.query( - "create table IF NOT EXISTS ontime (Year UInt16, FlightDate String) ENGINE = Memory" - ) - instance.query( - "insert into ontime values (2016, 'test6'), (2017, 'test7'), (2018, 'test8')" - ) - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT Year FROM ontime22") - ) == TSV("2017\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE ontime") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE ontime22") - - -class Task_no_arg: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_no_arg" - self.rows = 1000000 - self.container_task_file = "/task_no_arg.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_no_arg.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS copier_test1 SYNC") - instance.query( - "create table if not exists copier_test1 (date Date, id UInt32) engine = MergeTree PARTITION BY date ORDER BY date SETTINGS index_granularity = 8192" - ) - instance.query("insert into copier_test1 values ('2016-01-01', 10);") - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT date FROM copier_test1_1") - ) == TSV("2016-01-01\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE copier_test1 SYNC") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE copier_test1_1 SYNC") - - -class Task_non_partitioned_table: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_non_partitoned_table" - self.rows = 1000000 - self.container_task_file = "/task_non_partitioned_table.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_non_partitioned_table.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE IF EXISTS copier_test1 SYNC") - instance.query( - "create table copier_test1 (date Date, id UInt32) engine = MergeTree ORDER BY date SETTINGS index_granularity = 8192" - ) - instance.query("insert into copier_test1 values ('2016-01-01', 10);") - - def check(self): - assert TSV( - self.cluster.instances["s1_1_0"].query("SELECT date FROM copier_test1_1") - ) == TSV("2016-01-01\n") - instance = cluster.instances["s0_0_0"] - instance.query("DROP TABLE copier_test1") - instance = cluster.instances["s1_1_0"] - instance.query("DROP TABLE copier_test1_1") - - -class Task_self_copy: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_self_copy" - self.container_task_file = "/task_self_copy.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_self_copy.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - instance = cluster.instances["s0_0_0"] - instance.query("DROP DATABASE IF EXISTS db1 SYNC") - instance.query("DROP DATABASE IF EXISTS db2 SYNC") - instance.query("CREATE DATABASE IF NOT EXISTS db1;") - instance.query( - "CREATE TABLE IF NOT EXISTS db1.source_table (`a` Int8, `b` String, `c` Int8) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192" - ) - instance.query("CREATE DATABASE IF NOT EXISTS db2;") - instance.query( - "CREATE TABLE IF NOT EXISTS db2.destination_table (`a` Int8, `b` String, `c` Int8) ENGINE = MergeTree PARTITION BY a ORDER BY a SETTINGS index_granularity = 8192" - ) - instance.query("INSERT INTO db1.source_table VALUES (1, 'ClickHouse', 1);") - instance.query("INSERT INTO db1.source_table VALUES (2, 'Copier', 2);") - - def check(self): - instance = cluster.instances["s0_0_0"] - assert TSV( - instance.query("SELECT * FROM db2.destination_table ORDER BY a") - ) == TSV(instance.query("SELECT * FROM db1.source_table ORDER BY a")) - instance = cluster.instances["s0_0_0"] - instance.query("DROP DATABASE IF EXISTS db1 SYNC") - instance.query("DROP DATABASE IF EXISTS db2 SYNC") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - try: - zk.delete("/clickhouse-copier", recursive=True) - except kazoo.exceptions.NoNodeError: - print("No node /clickhouse-copier. It is Ok in first test.") - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - print(cmd) - - copiers = random.sample(list(started_cluster.instances.keys()), 3) - - for instance_name in copiers: - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - print("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - print(output) - copiers_exec_ids.append(exec_id) - print( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # Wait for copiers stopping and check their return codes - for exec_id, instance_name in zip(copiers_exec_ids, copiers): - instance = started_cluster.instances[instance_name] - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(0.5) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) - - -# Tests - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_simple(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - ["--experimental-use-sample-offset", "1"], - ) - else: - execute_task(started_cluster, Task1(started_cluster), []) - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_with_recovering(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--experimental-use-sample-offset", - "1", - "--max-table-tries", - "10", - ], - ) - else: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--max-table-tries", - "10", - ], - ) - - -@pytest.mark.parametrize(("use_sample_offset"), [False, True]) -def test_copy_with_recovering_after_move_faults(started_cluster, use_sample_offset): - if use_sample_offset: - execute_task( - started_cluster, - Task1(started_cluster), - [ - "--move-fault-probability", - str(MOVING_FAIL_PROBABILITY), - "--experimental-use-sample-offset", - "1", - ], - ) - else: - execute_task( - started_cluster, - Task1(started_cluster), - ["--move-fault-probability", str(MOVING_FAIL_PROBABILITY)], - ) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition(started_cluster): - execute_task(started_cluster, Task2(started_cluster, "test1"), []) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition_with_recovering(started_cluster): - execute_task( - started_cluster, - Task2(started_cluster, "test2"), - [ - "--copy-fault-probability", - str(COPYING_FAIL_PROBABILITY), - "--max-table-tries", - "10", - ], - ) - - -@pytest.mark.timeout(600) -def test_copy_month_to_week_partition_with_recovering_after_move_faults( - started_cluster, -): - execute_task( - started_cluster, - Task2(started_cluster, "test3"), - ["--move-fault-probability", str(MOVING_FAIL_PROBABILITY)], - ) - - -def test_block_size(started_cluster): - execute_task(started_cluster, Task_test_block_size(started_cluster), []) - - -def test_no_index(started_cluster): - execute_task(started_cluster, Task_no_index(started_cluster), []) - - -def test_no_arg(started_cluster): - execute_task(started_cluster, Task_no_arg(started_cluster), []) - - -def test_non_partitioned_table(started_cluster): - execute_task(started_cluster, Task_non_partitioned_table(started_cluster), []) - - -def test_self_copy(started_cluster): - execute_task(started_cluster, Task_self_copy(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_three_nodes.py b/tests/integration/test_cluster_copier/test_three_nodes.py deleted file mode 100644 index e7d07757adb..00000000000 --- a/tests/integration/test_cluster_copier/test_three_nodes.py +++ /dev/null @@ -1,286 +0,0 @@ -import os -import sys -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first", "second", "third"]: - cluster.add_instance( - name, - main_configs=[ - "configs_three_nodes/conf.d/clusters.xml", - "configs_three_nodes/conf.d/ddl.xml", - ], - user_configs=["configs_three_nodes/users.xml"], - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class Task: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task" - self.container_task_file = "/task_taxi_data.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_taxi_data.xml"), - self.container_task_file, - ) - logging.debug( - f"Copied task file to container of '{instance_name}' instance. Path {self.container_task_file}" - ) - - def start(self): - for name in ["first", "second", "third"]: - node = cluster.instances[name] - node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;") - node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;") - - first = cluster.instances["first"] - - # daily partition database - first.query("CREATE DATABASE IF NOT EXISTS dailyhistory on cluster events;") - first.query( - """CREATE TABLE dailyhistory.yellow_tripdata_staging ON CLUSTER events - ( - id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime('UTC'), - tpep_dropoff_datetime DateTime('UTC'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, junk2 String - ) - Engine = ReplacingMergeTree() - PRIMARY KEY (tpep_pickup_datetime, id) - ORDER BY (tpep_pickup_datetime, id) - PARTITION BY (toYYYYMMDD(tpep_pickup_datetime))""" - ) - - first.query( - """CREATE TABLE dailyhistory.yellow_tripdata - ON CLUSTER events - AS dailyhistory.yellow_tripdata_staging - ENGINE = Distributed('events', 'dailyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""" - ) - - first.query( - """INSERT INTO dailyhistory.yellow_tripdata - SELECT * FROM generateRandom( - 'id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime(\\'UTC\\'), - tpep_dropoff_datetime DateTime(\\'UTC\\'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, - junk2 String', - 1, 10, 2) LIMIT 50;""" - ) - - # monthly partition database - first.query("create database IF NOT EXISTS monthlyhistory on cluster events;") - first.query( - """CREATE TABLE monthlyhistory.yellow_tripdata_staging ON CLUSTER events - ( - id UUID DEFAULT generateUUIDv4(), - vendor_id String, - tpep_pickup_datetime DateTime('UTC'), - tpep_dropoff_datetime DateTime('UTC'), - passenger_count Nullable(Float64), - trip_distance String, - pickup_longitude Float64, - pickup_latitude Float64, - rate_code_id String, - store_and_fwd_flag String, - dropoff_longitude Float64, - dropoff_latitude Float64, - payment_type String, - fare_amount String, - extra String, - mta_tax String, - tip_amount String, - tolls_amount String, - improvement_surcharge String, - total_amount String, - pickup_location_id String, - dropoff_location_id String, - congestion_surcharge String, - junk1 String, - junk2 String - ) - Engine = ReplacingMergeTree() - PRIMARY KEY (tpep_pickup_datetime, id) - ORDER BY (tpep_pickup_datetime, id) - PARTITION BY (pickup_location_id, toYYYYMM(tpep_pickup_datetime))""" - ) - - first.query( - """CREATE TABLE monthlyhistory.yellow_tripdata - ON CLUSTER events - AS monthlyhistory.yellow_tripdata_staging - ENGINE = Distributed('events', 'monthlyhistory', yellow_tripdata_staging, sipHash64(id) % 3);""" - ) - - def check(self): - first = cluster.instances["first"] - a = TSV(first.query("SELECT count() from dailyhistory.yellow_tripdata")) - b = TSV(first.query("SELECT count() from monthlyhistory.yellow_tripdata")) - assert a == b, "Distributed tables" - - for instance_name, instance in cluster.instances.items(): - instance = cluster.instances[instance_name] - a = instance.query( - "SELECT count() from dailyhistory.yellow_tripdata_staging" - ) - b = instance.query( - "SELECT count() from monthlyhistory.yellow_tripdata_staging" - ) - assert a == b, "MergeTree tables on each shard" - - a = TSV( - instance.query( - "SELECT sipHash64(*) from dailyhistory.yellow_tripdata_staging ORDER BY id" - ) - ) - b = TSV( - instance.query( - "SELECT sipHash64(*) from monthlyhistory.yellow_tripdata_staging ORDER BY id" - ) - ) - - assert a == b, "Data on each shard" - - for name in ["first", "second", "third"]: - node = cluster.instances[name] - node.query("DROP DATABASE IF EXISTS dailyhistory SYNC;") - node.query("DROP DATABASE IF EXISTS monthlyhistory SYNC;") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - logging.debug("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - logging.debug(f"execute_task cmd: {cmd}") - - for instance_name in started_cluster.instances.keys(): - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs_three_nodes/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - logging.info("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - logging.info(output) - copiers_exec_ids.append(exec_id) - logging.info( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # time.sleep(1000) - - # Wait for copiers stopping and check their return codes - for exec_id, instance in zip( - copiers_exec_ids, iter(started_cluster.instances.values()) - ): - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(1) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) - - -# Tests -@pytest.mark.timeout(600) -def test(started_cluster): - execute_task(started_cluster, Task(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_trivial.py b/tests/integration/test_cluster_copier/test_trivial.py deleted file mode 100644 index b8060583ef8..00000000000 --- a/tests/integration/test_cluster_copier/test_trivial.py +++ /dev/null @@ -1,227 +0,0 @@ -import os -import sys -import time -import random -import string - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import kazoo -import pytest -import docker - - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - - -COPYING_FAIL_PROBABILITY = 0.1 -MOVING_FAIL_PROBABILITY = 0.1 - -cluster = ClickHouseCluster(__file__) - - -def generateRandomString(count): - return "".join( - random.choice(string.ascii_uppercase + string.digits) for _ in range(count) - ) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first_trivial", "second_trivial"]: - instance = cluster.add_instance( - name, - main_configs=["configs/conf.d/clusters_trivial.xml"], - user_configs=["configs_two_nodes/users.xml"], - macros={ - "cluster": name, - "shard": "the_only_shard", - "replica": "the_only_replica", - }, - with_zookeeper=True, - ) - - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -class TaskTrivial: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_trivial" - self.copier_task_config = open( - os.path.join(CURRENT_TEST_DIR, "task_trivial.xml"), "r" - ).read() - - def start(self): - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - for node in [source, destination]: - node.query("DROP DATABASE IF EXISTS default") - node.query("CREATE DATABASE IF NOT EXISTS default") - - source.query( - "CREATE TABLE trivial (d UInt64, d1 UInt64 MATERIALIZED d+1)" - "ENGINE=ReplicatedMergeTree('/clickhouse/tables/source_trivial_cluster/1/trivial/{}', '1') " - "PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16".format( - generateRandomString(10) - ) - ) - - source.query( - "INSERT INTO trivial SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - zk = cluster.get_kazoo_client("zoo1") - status_data, _ = zk.get(self.zk_task_path + "/status") - assert ( - status_data - == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}' - ) - - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - assert TSV(source.query("SELECT count() FROM trivial")) == TSV("1002\n") - assert TSV(destination.query("SELECT count() FROM trivial")) == TSV("1002\n") - - for node in [source, destination]: - node.query("DROP TABLE trivial") - - -class TaskReplicatedWithoutArguments: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_trivial_without_arguments" - self.copier_task_config = open( - os.path.join(CURRENT_TEST_DIR, "task_trivial_without_arguments.xml"), "r" - ).read() - - def start(self): - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - for node in [source, destination]: - node.query("DROP DATABASE IF EXISTS default") - node.query("CREATE DATABASE IF NOT EXISTS default") - - source.query( - "CREATE TABLE trivial_without_arguments ON CLUSTER source_trivial_cluster (d UInt64, d1 UInt64 MATERIALIZED d+1) " - "ENGINE=ReplicatedMergeTree() " - "PARTITION BY d % 5 ORDER BY (d, sipHash64(d)) SAMPLE BY sipHash64(d) SETTINGS index_granularity = 16" - ) - - source.query( - "INSERT INTO trivial_without_arguments SELECT * FROM system.numbers LIMIT 1002", - settings={"distributed_foreground_insert": 1}, - ) - - def check(self): - zk = cluster.get_kazoo_client("zoo1") - status_data, _ = zk.get(self.zk_task_path + "/status") - assert ( - status_data - == b'{"hits":{"all_partitions_count":5,"processed_partitions_count":5}}' - ) - - source = cluster.instances["first_trivial"] - destination = cluster.instances["second_trivial"] - - assert TSV( - source.query("SELECT count() FROM trivial_without_arguments") - ) == TSV("1002\n") - assert TSV( - destination.query("SELECT count() FROM trivial_without_arguments") - ) == TSV("1002\n") - - for node in [source, destination]: - node.query("DROP TABLE trivial_without_arguments") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - try: - zk.delete("/clickhouse-copier", recursive=True) - except kazoo.exceptions.NoNodeError: - print("No node /clickhouse-copier. It is Ok in first test.") - - zk_task_path = task.zk_task_path - zk.ensure_path(zk_task_path) - zk.create(zk_task_path + "/description", task.copier_task_config.encode()) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - zk_task_path, - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - copiers = list(started_cluster.instances.keys()) - - for instance_name in copiers: - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - print("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - print(output) - copiers_exec_ids.append(exec_id) - print( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # Wait for copiers stopping and check their return codes - for exec_id, instance_name in zip(copiers_exec_ids, copiers): - instance = started_cluster.instances[instance_name] - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(0.5) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(zk_task_path, recursive=True) - - -# Tests - - -def test_trivial_copy(started_cluster): - execute_task(started_cluster, TaskTrivial(started_cluster), []) - - -def test_trivial_without_arguments(started_cluster): - execute_task(started_cluster, TaskReplicatedWithoutArguments(started_cluster), []) diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py deleted file mode 100644 index 1bd3561f24f..00000000000 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ /dev/null @@ -1,597 +0,0 @@ -import os -import sys -import time -import logging -import pytest - -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -import docker - -CURRENT_TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.insert(0, os.path.dirname(CURRENT_TEST_DIR)) - -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - global cluster - try: - for name in ["first_of_two", "second_of_two"]: - instance = cluster.add_instance( - name, - main_configs=[ - "configs_two_nodes/conf.d/clusters.xml", - "configs_two_nodes/conf.d/ddl.xml", - "configs_two_nodes/conf.d/storage_configuration.xml", - ], - user_configs=["configs_two_nodes/users.xml"], - with_zookeeper=True, - ) - - cluster.start() - - for name in ["first_of_two", "second_of_two"]: - instance = cluster.instances[name] - instance.exec_in_container(["bash", "-c", "mkdir /jbod1"]) - instance.exec_in_container(["bash", "-c", "mkdir /jbod2"]) - instance.exec_in_container(["bash", "-c", "mkdir /external"]) - - yield cluster - - finally: - cluster.shutdown() - - -# Will copy table from `first` node to `second` -class TaskWithDifferentSchema: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_with_different_schema" - self.container_task_file = "/task_with_different_schema.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_with_different_schema.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - second.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_different_schema;") - first.query( - """CREATE TABLE db_different_schema.source - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime, - Column5 UInt16, - Column6 String, - Column7 String, - Column8 String, - Column9 String, - Column10 String, - Column11 String, - Column12 Decimal(3, 1), - Column13 DateTime, - Column14 UInt16 - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9) - ORDER BY (Column1, Column2, Column3, Column4, Column6, Column7, Column8, Column9) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_different_schema.source SELECT * FROM generateRandom( - 'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16, - Column6 String, Column7 String, Column8 String, Column9 String, Column10 String, - Column11 String, Column12 Decimal(3, 1), Column13 DateTime, Column14 UInt16', 1, 10, 2) LIMIT 50;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_different_schema;") - second.query( - """CREATE TABLE db_different_schema.destination - ( - Column1 LowCardinality(String) CODEC(LZ4), - Column2 UInt32 CODEC(LZ4), - Column3 Date CODEC(DoubleDelta, LZ4), - Column4 DateTime CODEC(DoubleDelta, LZ4), - Column5 UInt16 CODEC(LZ4), - Column6 LowCardinality(String) CODEC(ZSTD), - Column7 LowCardinality(String) CODEC(ZSTD), - Column8 LowCardinality(String) CODEC(ZSTD), - Column9 LowCardinality(String) CODEC(ZSTD), - Column10 String CODEC(ZSTD(6)), - Column11 LowCardinality(String) CODEC(LZ4), - Column12 Decimal(3,1) CODEC(LZ4), - Column13 DateTime CODEC(DoubleDelta, LZ4), - Column14 UInt16 CODEC(LZ4) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column9, Column1, Column2, Column3, Column4);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_different_schema.source") - b = second.query("SELECT count() from db_different_schema.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_different_schema.source - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_different_schema.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8, Column9, Column10, Column11, Column12, Column13, Column14)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - second.query("DROP DATABASE IF EXISTS db_different_schema SYNC") - - -# Just simple copying, but table schema has TTL on columns -# Also table will have slightly different schema -class TaskTTL: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_ttl_columns" - self.container_task_file = "/task_ttl_columns.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_ttl_columns.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;") - first.query( - """CREATE TABLE db_ttl_columns.source - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime, - Column5 UInt16, - Column6 String TTL now() + INTERVAL 1 MONTH, - Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH, - Column8 Tuple(Float64, Float64) TTL now() + INTERVAL 1 MONTH - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_ttl_columns.source SELECT * FROM generateRandom( - 'Column1 String, Column2 UInt32, Column3 Date, Column4 DateTime, Column5 UInt16, - Column6 String, Column7 Decimal(3, 1), Column8 Tuple(Float64, Float64)', 1, 10, 2) LIMIT 50;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_ttl_columns;") - second.query( - """CREATE TABLE db_ttl_columns.destination - ( - Column1 String, - Column2 UInt32, - Column3 Date, - Column4 DateTime TTL now() + INTERVAL 1 MONTH, - Column5 UInt16 TTL now() + INTERVAL 1 MONTH, - Column6 String TTL now() + INTERVAL 1 MONTH, - Column7 Decimal(3, 1) TTL now() + INTERVAL 1 MONTH, - Column8 Tuple(Float64, Float64) - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_ttl_columns.source") - b = second.query("SELECT count() from db_ttl_columns.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_ttl_columns.source - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_ttl_columns.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5, Column6, Column7, Column8)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - second.query("DROP DATABASE IF EXISTS db_ttl_columns SYNC") - - -class TaskSkipIndex: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_skip_index" - self.container_task_file = "/task_skip_index.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_skip_index.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - second.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_skip_index;") - first.query( - """CREATE TABLE db_skip_index.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String, - INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3, - INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4 - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - SETTINGS index_granularity = 8192""" - ) - - first.query( - """INSERT INTO db_skip_index.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_skip_index;") - second.query( - """CREATE TABLE db_skip_index.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String, - INDEX a (Column1 * Column2, Column5) TYPE minmax GRANULARITY 3, - INDEX b (Column1 * length(Column5)) TYPE set(1000) GRANULARITY 4 - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_skip_index.source") - b = second.query("SELECT count() from db_skip_index.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_skip_index.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_skip_index.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - second.query("DROP DATABASE IF EXISTS db_skip_index SYNC") - - -class TaskTTLMoveToVolume: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_ttl_move_to_volume" - self.container_task_file = "/task_ttl_move_to_volume.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_ttl_move_to_volume.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["first_of_two"] - - first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;") - first.query( - """CREATE TABLE db_move_to_volume.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3) - TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' - SETTINGS storage_policy = 'external_with_jbods';""" - ) - - first.query( - """INSERT INTO db_move_to_volume.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_move_to_volume;") - second.query( - """CREATE TABLE db_move_to_volume.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1) - TTL Column3 + INTERVAL 1 MONTH TO VOLUME 'external' - SETTINGS storage_policy = 'external_with_jbods';""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_move_to_volume.source") - b = second.query("SELECT count() from db_move_to_volume.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_move_to_volume.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_move_to_volume.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - second.query("DROP DATABASE IF EXISTS db_move_to_volume SYNC") - - -class TaskDropTargetPartition: - def __init__(self, cluster): - self.cluster = cluster - self.zk_task_path = "/clickhouse-copier/task_drop_target_partition" - self.container_task_file = "/task_drop_target_partition.xml" - - for instance_name, _ in cluster.instances.items(): - instance = cluster.instances[instance_name] - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "./task_drop_target_partition.xml"), - self.container_task_file, - ) - print( - "Copied task file to container of '{}' instance. Path {}".format( - instance_name, self.container_task_file - ) - ) - - def start(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - - first.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;") - first.query( - """CREATE TABLE db_drop_target_partition.source - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) - ENGINE = MergeTree() - PARTITION BY (toYYYYMMDD(Column3), Column3) - PRIMARY KEY (Column1, Column2, Column3) - ORDER BY (Column1, Column2, Column3);""" - ) - - first.query( - """INSERT INTO db_drop_target_partition.source SELECT * FROM generateRandom( - 'Column1 UInt64, Column2 Int32, Column3 Date, Column4 DateTime, Column5 String', 1, 10, 2) LIMIT 100;""" - ) - - second.query("CREATE DATABASE IF NOT EXISTS db_drop_target_partition;") - second.query( - """CREATE TABLE db_drop_target_partition.destination - ( - Column1 UInt64, - Column2 Int32, - Column3 Date, - Column4 DateTime, - Column5 String - ) ENGINE = MergeTree() - PARTITION BY toYYYYMMDD(Column3) - ORDER BY (Column3, Column2, Column1);""" - ) - - # Insert data in target too. It has to be dropped. - first.query( - """INSERT INTO db_drop_target_partition.destination SELECT * FROM db_drop_target_partition.source;""" - ) - - print("Preparation completed") - - def check(self): - first = cluster.instances["first_of_two"] - second = cluster.instances["second_of_two"] - - a = first.query("SELECT count() from db_drop_target_partition.source") - b = second.query("SELECT count() from db_drop_target_partition.destination") - assert a == b, "Count" - - a = TSV( - first.query( - """SELECT sipHash64(*) from db_drop_target_partition.source - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - b = TSV( - second.query( - """SELECT sipHash64(*) from db_drop_target_partition.destination - ORDER BY (Column1, Column2, Column3, Column4, Column5)""" - ) - ) - assert a == b, "Data" - - first.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - second.query("DROP DATABASE IF EXISTS db_drop_target_partition SYNC") - - -def execute_task(started_cluster, task, cmd_options): - task.start() - - zk = started_cluster.get_kazoo_client("zoo1") - print("Use ZooKeeper server: {}:{}".format(zk.hosts[0][0], zk.hosts[0][1])) - - # Run cluster-copier processes on each node - docker_api = started_cluster.docker_client.api - copiers_exec_ids = [] - - cmd = [ - "/usr/bin/clickhouse", - "copier", - "--config", - "/etc/clickhouse-server/config-copier.xml", - "--task-path", - task.zk_task_path, - "--task-file", - task.container_task_file, - "--task-upload-force", - "true", - "--base-dir", - "/var/log/clickhouse-server/copier", - ] - cmd += cmd_options - - print(cmd) - - for instance_name in started_cluster.instances.keys(): - instance = started_cluster.instances[instance_name] - container = instance.get_docker_handle() - instance.copy_file_to_container( - os.path.join(CURRENT_TEST_DIR, "configs_two_nodes/config-copier.xml"), - "/etc/clickhouse-server/config-copier.xml", - ) - logging.info("Copied copier config to {}".format(instance.name)) - exec_id = docker_api.exec_create(container.id, cmd, stderr=True) - output = docker_api.exec_start(exec_id).decode("utf8") - logging.info(output) - copiers_exec_ids.append(exec_id) - logging.info( - "Copier for {} ({}) has started".format(instance.name, instance.ip_address) - ) - - # time.sleep(1000) - - # Wait for copiers stopping and check their return codes - for exec_id, instance in zip( - copiers_exec_ids, iter(started_cluster.instances.values()) - ): - while True: - res = docker_api.exec_inspect(exec_id) - if not res["Running"]: - break - time.sleep(1) - - assert res["ExitCode"] == 0, "Instance: {} ({}). Info: {}".format( - instance.name, instance.ip_address, repr(res) - ) - - try: - task.check() - finally: - zk.delete(task.zk_task_path, recursive=True) diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index ac59b3428e8..628e1432350 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -72,7 +72,7 @@ 8123 HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$' HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$' -Checks: '*, - -abseil-*, +Checks: [ + '*', - -altera-*, + '-abseil-*', - -android-*, + '-altera-*', - -bugprone-assignment-in-if-condition, - -bugprone-branch-clone, - -bugprone-easily-swappable-parameters, - -bugprone-exception-escape, - -bugprone-implicit-widening-of-multiplication-result, - -bugprone-narrowing-conversions, - -bugprone-not-null-terminated-result, - -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged - -bugprone-unchecked-optional-access, + '-android-*', - -cert-dcl16-c, - -cert-dcl37-c, - -cert-dcl51-cpp, - -cert-err58-cpp, - -cert-msc32-c, - -cert-msc51-cpp, - -cert-oop54-cpp, - -cert-oop57-cpp, + '-bugprone-assignment-in-if-condition', + '-bugprone-branch-clone', + '-bugprone-easily-swappable-parameters', + '-bugprone-exception-escape', + '-bugprone-implicit-widening-of-multiplication-result', + '-bugprone-narrowing-conversions', + '-bugprone-not-null-terminated-result', + '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged + '-bugprone-unchecked-optional-access', - -clang-analyzer-unix.Malloc, + '-cert-dcl16-c', + '-cert-dcl37-c', + '-cert-dcl51-cpp', + '-cert-err58-cpp', + '-cert-msc32-c', + '-cert-msc51-cpp', + '-cert-oop54-cpp', + '-cert-oop57-cpp', - -cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow + '-clang-analyzer-unix.Malloc', - -darwin-*, + '-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow - -fuchsia-*, + '-darwin-*', - -google-build-using-namespace, - -google-readability-braces-around-statements, - -google-readability-casting, - -google-readability-function-size, - -google-readability-namespace-comments, - -google-readability-todo, + '-fuchsia-*', - -hicpp-avoid-c-arrays, - -hicpp-avoid-goto, - -hicpp-braces-around-statements, - -hicpp-explicit-conversions, - -hicpp-function-size, - -hicpp-member-init, - -hicpp-move-const-arg, - -hicpp-multiway-paths-covered, - -hicpp-named-parameter, - -hicpp-no-array-decay, - -hicpp-no-assembler, - -hicpp-no-malloc, - -hicpp-signed-bitwise, - -hicpp-special-member-functions, - -hicpp-uppercase-literal-suffix, - -hicpp-use-auto, - -hicpp-use-emplace, - -hicpp-vararg, + '-google-build-using-namespace', + '-google-readability-braces-around-statements', + '-google-readability-casting', + '-google-readability-function-size', + '-google-readability-namespace-comments', + '-google-readability-todo', - -linuxkernel-*, + '-hicpp-avoid-c-arrays', + '-hicpp-avoid-goto', + '-hicpp-braces-around-statements', + '-hicpp-explicit-conversions', + '-hicpp-function-size', + '-hicpp-member-init', + '-hicpp-move-const-arg', + '-hicpp-multiway-paths-covered', + '-hicpp-named-parameter', + '-hicpp-no-array-decay', + '-hicpp-no-assembler', + '-hicpp-no-malloc', + '-hicpp-signed-bitwise', + '-hicpp-special-member-functions', + '-hicpp-uppercase-literal-suffix', + '-hicpp-use-auto', + '-hicpp-use-emplace', + '-hicpp-vararg', - -llvm-*, + '-linuxkernel-*', - -llvmlibc-*, + '-llvm-*', - -openmp-*, + '-llvmlibc-*', - -misc-const-correctness, - -misc-include-cleaner, # useful but far too many occurrences - -misc-no-recursion, - -misc-non-private-member-variables-in-classes, - -misc-confusable-identifiers, # useful but slooow - -misc-use-anonymous-namespace, + '-openmp-*', - -modernize-avoid-c-arrays, - -modernize-concat-nested-namespaces, - -modernize-macro-to-enum, - -modernize-pass-by-value, - -modernize-return-braced-init-list, - -modernize-use-auto, - -modernize-use-default-member-init, - -modernize-use-emplace, - -modernize-use-nodiscard, - -modernize-use-override, - -modernize-use-trailing-return-type, + '-misc-const-correctness', + '-misc-include-cleaner', # useful but far too many occurrences + '-misc-no-recursion', + '-misc-non-private-member-variables-in-classes', + '-misc-confusable-identifiers', # useful but slooo + '-misc-use-anonymous-namespace', - -performance-inefficient-string-concatenation, - -performance-no-int-to-ptr, - -performance-avoid-endl, - -performance-unnecessary-value-param, + '-modernize-avoid-c-arrays', + '-modernize-concat-nested-namespaces', + '-modernize-macro-to-enum', + '-modernize-pass-by-value', + '-modernize-return-braced-init-list', + '-modernize-use-auto', + '-modernize-use-default-member-init', + '-modernize-use-emplace', + '-modernize-use-nodiscard', + '-modernize-use-override', + '-modernize-use-trailing-return-type', - -portability-simd-intrinsics, + '-performance-inefficient-string-concatenation', + '-performance-no-int-to-ptr', + '-performance-avoid-endl', + '-performance-unnecessary-value-param', - -readability-avoid-unconditional-preprocessor-if, - -readability-braces-around-statements, - -readability-convert-member-functions-to-static, - -readability-else-after-return, - -readability-function-cognitive-complexity, - -readability-function-size, - -readability-identifier-length, - -readability-identifier-naming, # useful but too slow - -readability-implicit-bool-conversion, - -readability-isolate-declaration, - -readability-magic-numbers, - -readability-named-parameter, - -readability-redundant-declaration, - -readability-simplify-boolean-expr, - -readability-static-accessed-through-instance, - -readability-suspicious-call-argument, - -readability-uppercase-literal-suffix, - -readability-use-anyofallof, + '-portability-simd-intrinsics', - -zircon-*, -' + '-readability-avoid-unconditional-preprocessor-if', + '-readability-braces-around-statements', + '-readability-convert-member-functions-to-static', + '-readability-else-after-return', + '-readability-function-cognitive-complexity', + '-readability-function-size', + '-readability-identifier-length', + '-readability-identifier-naming', # useful but too slow + '-readability-implicit-bool-conversion', + '-readability-isolate-declaration', + '-readability-magic-numbers', + '-readability-named-parameter', + '-readability-redundant-declaration', + '-readability-simplify-boolean-expr', + '-readability-static-accessed-through-instance', + '-readability-suspicious-call-argument', + '-readability-uppercase-literal-suffix', + '-readability-use-anyofallof', + + '-zircon-*' +] WarningsAsErrors: '*' From e5e84419aff0f559bc545737bfdc0518a732f7ff Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 10 Mar 2024 14:29:18 +0000 Subject: [PATCH 281/985] Fix clang-tidy-s --- contrib/libmetrohash/src/metrohash128.h | 3 +++ src/Access/AccessControl.h | 10 +++++----- src/Access/IAccessStorage.cpp | 2 +- src/Access/IAccessStorage.h | 2 +- src/Common/Arena.h | 4 +--- src/Common/DNSResolver.cpp | 2 +- src/Common/DNSResolver.h | 2 +- src/Common/DateLUTImpl.h | 2 +- src/Common/MultiVersion.h | 4 ++-- src/Common/PODArray.h | 6 +++--- src/Common/SipHash.h | 2 +- src/Common/TransactionID.h | 2 +- src/Common/ZooKeeper/IKeeper.cpp | 8 ++++---- src/Common/ZooKeeper/IKeeper.h | 16 ++++++++-------- src/Common/logger_useful.h | 16 ++++++++-------- src/Core/PostgreSQL/insertPostgreSQLValue.cpp | 4 ++-- src/Core/PostgreSQL/insertPostgreSQLValue.h | 4 ++-- src/Core/Settings.h | 2 ++ src/Dictionaries/CacheDictionary.cpp | 4 ++-- src/Dictionaries/CacheDictionary.h | 2 +- .../GeodataProviders/IHierarchiesProvider.h | 2 +- src/Dictionaries/RegExpTreeDictionary.cpp | 2 +- src/Dictionaries/RegExpTreeDictionary.h | 2 +- src/Functions/IFunction.h | 4 ---- src/IO/ReadSettings.h | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.h | 2 +- src/Interpreters/Context.h | 4 ++-- src/Interpreters/IExternalLoadable.h | 2 +- src/Interpreters/ProcessList.h | 2 +- src/Processors/Chunk.h | 2 +- .../Algorithms/AggregatingSortedAlgorithm.cpp | 2 +- src/Processors/Port.h | 2 +- src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp | 4 ++-- src/Processors/TTL/TTLUpdateInfoAlgorithm.h | 4 ++-- src/Storages/StorageInMemoryMetadata.h | 4 ++-- 36 files changed, 69 insertions(+), 69 deletions(-) diff --git a/contrib/libmetrohash/src/metrohash128.h b/contrib/libmetrohash/src/metrohash128.h index 2dbb6ca5a8a..f507c917caf 100644 --- a/contrib/libmetrohash/src/metrohash128.h +++ b/contrib/libmetrohash/src/metrohash128.h @@ -17,6 +17,8 @@ #ifndef METROHASH_METROHASH_128_H #define METROHASH_METROHASH_128_H +// NOLINTBEGIN(readability-avoid-const-params-in-decls) + #include class MetroHash128 @@ -68,5 +70,6 @@ private: void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +// NOLINTEND(readability-avoid-const-params-in-decls) #endif // #ifndef METROHASH_METROHASH_128_H diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 55ea4e4f717..1af74e02fb7 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -133,20 +133,20 @@ public: /// This function also enables custom prefixes to be used. void setCustomSettingsPrefixes(const Strings & prefixes); void setCustomSettingsPrefixes(const String & comma_separated_prefixes); - bool isSettingNameAllowed(const std::string_view name) const; - void checkSettingNameIsAllowed(const std::string_view name) const; + bool isSettingNameAllowed(std::string_view name) const; + void checkSettingNameIsAllowed(std::string_view name) const; /// Allows implicit user creation without password (by default it's allowed). /// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause. - void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_); + void setImplicitNoPasswordAllowed(bool allow_implicit_no_password_); bool isImplicitNoPasswordAllowed() const; /// Allows users without password (by default it's allowed). - void setNoPasswordAllowed(const bool allow_no_password_); + void setNoPasswordAllowed(bool allow_no_password_); bool isNoPasswordAllowed() const; /// Allows users with plaintext password (by default it's allowed). - void setPlaintextPasswordAllowed(const bool allow_plaintext_password_); + void setPlaintextPasswordAllowed(bool allow_plaintext_password_); bool isPlaintextPasswordAllowed() const; /// Default password type when the user does not specify it. diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index fbe9e231002..1d6b8d99cd5 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -616,7 +616,7 @@ UUID IAccessStorage::generateRandomID() } -void IAccessStorage::clearConflictsInEntitiesList(std::vector> & entities, const LoggerPtr log_) +void IAccessStorage::clearConflictsInEntitiesList(std::vector> & entities, LoggerPtr log_) { std::unordered_map positions_by_id; std::unordered_map positions_by_type_and_name[static_cast(AccessEntityType::MAX)]; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index ebb5a39cdf0..ad78bf92e02 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -228,7 +228,7 @@ protected: static UUID generateRandomID(); LoggerPtr getLogger() const; static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); } - static void clearConflictsInEntitiesList(std::vector> & entities, const LoggerPtr log_); + static void clearConflictsInEntitiesList(std::vector> & entities, LoggerPtr log_); [[noreturn]] void throwNotFound(const UUID & id) const; [[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const; [[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type); diff --git a/src/Common/Arena.h b/src/Common/Arena.h index cb26397844b..ba5b9ea9205 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -47,9 +47,7 @@ private: std::unique_ptr prev; - MemoryChunk() - { - } + MemoryChunk() = default; void swap(MemoryChunk & other) { diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index e36e1483da8..4b577a251af 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -297,7 +297,7 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled) impl->disable_cache = is_disabled; } -void DNSResolver::setCacheMaxEntries(const UInt64 cache_max_entries) +void DNSResolver::setCacheMaxEntries(UInt64 cache_max_entries) { impl->cache_address.setMaxSizeInBytes(cache_max_entries); impl->cache_host.setMaxSizeInBytes(cache_max_entries); diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index e3030e51a96..1ddd9d3b991 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -56,7 +56,7 @@ public: void setDisableCacheFlag(bool is_disabled = true); /// Set a limit of entries in cache - void setCacheMaxEntries(const UInt64 cache_max_entries); + void setCacheMaxEntries(UInt64 cache_max_entries); /// Drops all caches void dropCache(); diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 7bf66c0504a..4087e77d588 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -255,7 +255,7 @@ private: static LUTIndex toLUTIndex(ExtendedDayNum d) { - return normalizeLUTIndex(static_cast(d + daynum_offset_epoch)); + return normalizeLUTIndex(static_cast(d) + daynum_offset_epoch); } LUTIndex toLUTIndex(Time t) const diff --git a/src/Common/MultiVersion.h b/src/Common/MultiVersion.h index 8f488f9fcbc..680e224f869 100644 --- a/src/Common/MultiVersion.h +++ b/src/Common/MultiVersion.h @@ -41,9 +41,9 @@ public: } /// There is no copy constructor because only one MultiVersion should own the same object. - MultiVersion(MultiVersion && src) { *this = std::move(src); } + MultiVersion(MultiVersion && src) { *this = std::move(src); } /// NOLINT - MultiVersion & operator=(MultiVersion && src) + MultiVersion & operator=(MultiVersion && src) /// NOLINT { if (this != &src) { diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 1a4047a2588..af863e01fb2 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -25,7 +25,7 @@ */ template constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v - || (std::is_integral_v && std::is_integral_v && sizeof(T) == sizeof(U)); + || (std::is_integral_v && std::is_integral_v && sizeof(T) == sizeof(U)); /// NOLINT(misc-redundant-expression) namespace DB { @@ -558,7 +558,7 @@ public: } template - void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) + void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) /// NOLINT(performance-noexcept-swap) { #ifndef NDEBUG this->unprotect(); @@ -756,7 +756,7 @@ public: }; template -void swap(PODArray & lhs, PODArray & rhs) +void swap(PODArray & lhs, PODArray & rhs) /// NOLINT { lhs.swap(rhs); } diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 729fb76a573..c89ee2c9d90 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -149,7 +149,7 @@ public: /// Pad the remainder, which is missing up to an 8-byte word. current_word = 0; - switch (end - data) + switch (end - data) /// NOLINT(bugprone-switch-missing-default-case) { case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; diff --git a/src/Common/TransactionID.h b/src/Common/TransactionID.h index 3ab86f7589c..97d0072bc14 100644 --- a/src/Common/TransactionID.h +++ b/src/Common/TransactionID.h @@ -16,7 +16,7 @@ class MergeTreeTransaction; /// or transaction object is not needed and not passed intentionally. #ifndef NO_TRANSACTION_PTR #define NO_TRANSACTION_PTR std::shared_ptr(nullptr) -#define NO_TRANSACTION_RAW static_cast(nullptr) +#define NO_TRANSACTION_RAW static_cast(nullptr) /// NOLINT(bugprone-macro-parentheses) #endif /// Commit Sequence Number diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 6c47ea68b84..7d2602bde1e 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -23,7 +23,7 @@ namespace ProfileEvents namespace Coordination { -void Exception::incrementErrorMetrics(const Error code_) +void Exception::incrementErrorMetrics(Error code_) { if (Coordination::isUserError(code_)) ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions); @@ -33,14 +33,14 @@ void Exception::incrementErrorMetrics(const Error code_) ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions); } -Exception::Exception(const std::string & msg, const Error code_, int) +Exception::Exception(const std::string & msg, Error code_, int) : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION) , code(code_) { incrementErrorMetrics(code); } -Exception::Exception(PreformattedMessage && msg, const Error code_) +Exception::Exception(PreformattedMessage && msg, Error code_) : DB::Exception(std::move(msg), DB::ErrorCodes::KEEPER_EXCEPTION) , code(code_) { @@ -48,7 +48,7 @@ Exception::Exception(PreformattedMessage && msg, const Error code_) incrementErrorMetrics(code); } -Exception::Exception(const Error code_) +Exception::Exception(Error code_) : Exception(code_, "Coordination error: {}", errorMessage(code_)) { } diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index c7b902ea03a..ec49c94808e 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -466,13 +466,13 @@ class Exception : public DB::Exception { private: /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution. - Exception(const std::string & msg, const Error code_, int); /// NOLINT - Exception(PreformattedMessage && msg, const Error code_); + Exception(const std::string & msg, Error code_, int); /// NOLINT + Exception(PreformattedMessage && msg, Error code_); /// Message must be a compile-time constant template requires std::is_convertible_v - Exception(T && message, const Error code_) : DB::Exception(std::forward(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_) + Exception(T && message, Error code_) : DB::Exception(std::forward(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_) { incrementErrorMetrics(code); } @@ -480,23 +480,23 @@ private: static void incrementErrorMetrics(Error code_); public: - explicit Exception(const Error code_); /// NOLINT + explicit Exception(Error code_); /// NOLINT Exception(const Exception & exc); template - Exception(const Error code_, FormatStringHelper fmt, Args &&... args) + Exception(Error code_, FormatStringHelper fmt, Args &&... args) : DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, std::move(fmt), std::forward(args)...) , code(code_) { incrementErrorMetrics(code); } - inline static Exception createDeprecated(const std::string & msg, const Error code_) + inline static Exception createDeprecated(const std::string & msg, Error code_) { return Exception(msg, code_, 0); } - inline static Exception fromPath(const Error code_, const std::string & path) + inline static Exception fromPath(Error code_, const std::string & path) { return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path); } @@ -504,7 +504,7 @@ public: /// Message must be a compile-time constant template requires std::is_convertible_v - inline static Exception fromMessage(const Error code_, T && message) + inline static Exception fromMessage(Error code_, T && message) { return Exception(std::forward(message), code_); } diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h index 8e78e93e198..013b35e695e 100644 --- a/src/Common/logger_useful.h +++ b/src/Common/logger_useful.h @@ -19,14 +19,14 @@ namespace Poco { class Logger; } using LogSeriesLimiterPtr = std::shared_ptr; -namespace +namespace impl { - [[maybe_unused]] LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; } - [[maybe_unused]] LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); } - [[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; } - [[maybe_unused]] std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } - [[maybe_unused]] std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } - [[maybe_unused]] LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; } + [[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; } + [[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); } + [[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; } + [[maybe_unused]] inline std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } + [[maybe_unused]] inline std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } + [[maybe_unused]] inline LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; } } #define LOG_IMPL_FIRST_ARG(X, ...) X @@ -65,7 +65,7 @@ namespace #define LOG_IMPL(logger, priority, PRIORITY, ...) do \ { \ - auto _logger = ::getLoggerHelper(logger); \ + auto _logger = ::impl::getLoggerHelper(logger); \ const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ if (!_is_clients_log && !_logger->is((PRIORITY))) \ diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index aa60bdee28a..b507b300769 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum void insertPostgreSQLValue( IColumn & column, std::string_view value, - const ExternalResultDescription::ValueType type, const DataTypePtr data_type, + ExternalResultDescription::ValueType type, DataTypePtr data_type, const std::unordered_map & array_info, size_t idx) { switch (type) @@ -170,7 +170,7 @@ void insertPostgreSQLValue( void preparePostgreSQLArrayInfo( - std::unordered_map & array_info, size_t column_idx, const DataTypePtr data_type) + std::unordered_map & array_info, size_t column_idx, DataTypePtr data_type) { const auto * array_type = typeid_cast(data_type.get()); auto nested = array_type->getNestedType(); diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.h b/src/Core/PostgreSQL/insertPostgreSQLValue.h index 3bc83292b96..bfb85422aa1 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.h +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.h @@ -22,11 +22,11 @@ struct PostgreSQLArrayInfo void insertPostgreSQLValue( IColumn & column, std::string_view value, - const ExternalResultDescription::ValueType type, const DataTypePtr data_type, + ExternalResultDescription::ValueType type, DataTypePtr data_type, const std::unordered_map & array_info, size_t idx); void preparePostgreSQLArrayInfo( - std::unordered_map & array_info, size_t column_idx, const DataTypePtr data_type); + std::unordered_map & array_info, size_t column_idx, DataTypePtr data_type); void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d70a6cf51c5..c41db9d2141 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1192,6 +1192,7 @@ class IColumn; FORMAT_FACTORY_SETTINGS(M, ALIAS) \ OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \ +/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) @@ -1236,6 +1237,7 @@ private: /* * User-specified file format settings for File and URL engines. */ +/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, LIST_OF_ALL_FORMAT_SETTINGS) struct FormatFactorySettings : public BaseSettings diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 8444042db9e..6e9b09f8919 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -423,7 +423,7 @@ MutableColumns CacheDictionary::aggregateColumnsInOrderOfKe const DictionaryStorageFetchRequest & request, const MutableColumns & fetched_columns, const PaddedPODArray & key_index_to_state, - IColumn::Filter * const default_mask) const + IColumn::Filter * default_mask) const { MutableColumns aggregated_columns = request.makeAttributesResultColumns(); @@ -473,7 +473,7 @@ MutableColumns CacheDictionary::aggregateColumns( const PaddedPODArray & key_index_to_fetched_columns_from_storage_result, const MutableColumns & fetched_columns_during_update, const HashMap & found_keys_to_fetched_columns_during_update_index, - IColumn::Filter * const default_mask) const + IColumn::Filter * default_mask) const { /** * Aggregation of columns fetched from storage and from source during update. diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h index 8897fb40fa9..c02fb91c60e 100644 --- a/src/Dictionaries/CacheDictionary.h +++ b/src/Dictionaries/CacheDictionary.h @@ -162,7 +162,7 @@ private: const DictionaryStorageFetchRequest & request, const MutableColumns & fetched_columns, const PaddedPODArray & key_index_to_state, - IColumn::Filter * const default_mask = nullptr) const; + IColumn::Filter * default_mask = nullptr) const; MutableColumns aggregateColumns( const PaddedPODArray & keys, diff --git a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h index 68ab0fdca2d..a4b88127786 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h @@ -14,7 +14,7 @@ class IRegionsHierarchyReader public: virtual bool readNext(RegionEntry & entry) = 0; - virtual ~IRegionsHierarchyReader() {} + virtual ~IRegionsHierarchyReader() = default; }; using IRegionsHierarchyReaderPtr = std::unique_ptr; diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 4d82aa9ca0e..1f5c2d6d2c7 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -568,7 +568,7 @@ bool RegExpTreeDictionary::setAttributesShortCircuit( const String & data, std::unordered_set & visited_nodes, const std::unordered_map & attributes, - std::unordered_set * const defaults) const + std::unordered_set * defaults) const { if (visited_nodes.contains(id)) return attributes_to_set.attributesFull() == attributes.size(); diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h index 9e14abb49d0..d6bc90ef651 100644 --- a/src/Dictionaries/RegExpTreeDictionary.h +++ b/src/Dictionaries/RegExpTreeDictionary.h @@ -210,7 +210,7 @@ private: const String & data, std::unordered_set & visited_nodes, const std::unordered_map & attributes, - std::unordered_set * const defaults) const; + std::unordered_set * defaults) const; struct RegexTreeNode; using RegexTreeNodePtr = std::shared_ptr; diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 05aa08e2ad7..9b7cdf12d57 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -13,10 +13,6 @@ #include -#if USE_EMBEDDED_COMPILER -# include -#endif - /// This file contains user interface for functions. namespace llvm diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 38904df4403..5c401c0c8d9 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -63,6 +63,7 @@ enum class RemoteFSReadMethod class MMappedFileCache; class PageCache; +/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) struct ReadSettings { /// Method to use reading from local filesystem. diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 7d56dbabe3c..9327f31b6ff 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -905,7 +905,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, - const LoggerPtr logger, + LoggerPtr logger, LogFunc && add_to_async_insert_log) { size_t total_rows = 0; diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index f60b3d343fb..5076701d0b0 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -265,7 +265,7 @@ private: const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, - const LoggerPtr logger, + LoggerPtr logger, LogFunc && add_to_async_insert_log); template diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index c8aa3604a6f..43df8d6adf2 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -330,7 +330,7 @@ protected: return *this; } - void swap(QueryAccessInfo & rhs) + void swap(QueryAccessInfo & rhs) noexcept { std::swap(databases, rhs.databases); std::swap(tables, rhs.tables); @@ -680,7 +680,7 @@ public: void addSpecialScalar(const String & name, const Block & block); const QueryAccessInfo & getQueryAccessInfo() const { return *getQueryAccessInfoPtr(); } - const QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; } + QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; } void setQueryAccessInfo(QueryAccessInfoPtr other) { query_access_info = other; } void addQueryAccessInfo( diff --git a/src/Interpreters/IExternalLoadable.h b/src/Interpreters/IExternalLoadable.h index 3c004508b0a..47031778876 100644 --- a/src/Interpreters/IExternalLoadable.h +++ b/src/Interpreters/IExternalLoadable.h @@ -23,7 +23,7 @@ struct ExternalLoadableLifetime UInt64 max_sec = 0; ExternalLoadableLifetime(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); - ExternalLoadableLifetime() {} + ExternalLoadableLifetime() = default; }; /// Get delay before trying to load again after error. diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 1c253f562e8..ad47041c762 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -318,7 +318,7 @@ public: ~ProcessListEntry(); QueryStatusPtr getQueryStatus() { return *it; } - const QueryStatusPtr getQueryStatus() const { return *it; } + QueryStatusPtr getQueryStatus() const { return *it; } }; diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 9a7d6bc294d..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -59,7 +59,7 @@ public: Chunk clone() const; - void swap(Chunk & other) + void swap(Chunk & other) noexcept { columns.swap(other.columns); chunk_info.swap(other.chunk_info); diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index d2d2434c477..3bd0b532d90 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -126,7 +126,7 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co AggregatingSortedAlgorithm::SimpleAggregateDescription::SimpleAggregateDescription( - AggregateFunctionPtr function_, const size_t column_number_, + AggregateFunctionPtr function_, size_t column_number_, DataTypePtr nested_type_, DataTypePtr real_type_) : function(std::move(function_)), column_number(column_number_) , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 67af2f041aa..f3c7bbb5fee 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -110,7 +110,7 @@ protected: return result; } - uintptr_t ALWAYS_INLINE swap(std::atomic & value, std::uintptr_t flags, std::uintptr_t mask) + uintptr_t ALWAYS_INLINE swap(std::atomic & value, std::uintptr_t flags, std::uintptr_t mask) /// NOLINT { Data * expected = nullptr; Data * desired = getPtr(flags | getUInt(data)); diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp index b7cddf3c165..13d3030bbb8 100644 --- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp @@ -6,8 +6,8 @@ namespace DB TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm( const TTLExpressions & ttl_expressions_, const TTLDescription & description_, - const TTLUpdateField ttl_update_field_, - const String ttl_update_key_, + TTLUpdateField ttl_update_field_, + String ttl_update_key_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h index 0cf31765aef..b6aee6f7cb0 100644 --- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h @@ -22,8 +22,8 @@ public: TTLUpdateInfoAlgorithm( const TTLExpressions & ttl_expressions_, const TTLDescription & description_, - const TTLUpdateField ttl_update_field_, - const String ttl_update_key_, + TTLUpdateField ttl_update_field_, + String ttl_update_key_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_ ); diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 2823aba1224..69cd3422a7d 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -72,8 +72,8 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); - StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; - StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; + StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; /// NOLINT + StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; /// NOLINT /// NOTE: Thread unsafe part. You should not modify same StorageInMemoryMetadata /// structure from different threads. It should be used as MultiVersion From e562d97ff59739da3ca3650a33644bf9700becd7 Mon Sep 17 00:00:00 2001 From: Nikolay Yankin <211292+kolya7k@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:07:19 +0300 Subject: [PATCH 282/985] Update install.md https://packages.clickhouse.com/tgz/stable/ is multi-paged now and sorted by date asc so we can't get very last version --- docs/en/getting-started/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 234420de374..3b01434ecc5 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -262,7 +262,7 @@ The required version can be downloaded with `curl` or `wget` from repository htt After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version: ``` bash -LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ +LATEST_VERSION=$(curl -s -L https://api.github.com/repos/ClickHouse/ClickHouse/tags | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) export LATEST_VERSION From 90b27432a26c0a5204e09ff5ff5f2ae8df3055af Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 11 Mar 2024 12:18:58 +0100 Subject: [PATCH 283/985] Update test.py --- tests/integration/test_backup_restore_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 452a9143067..f3f4837c317 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -130,7 +130,7 @@ def check_system_tables(backup_query_id=None): if disk ] expected_disks = ( - ("default", "local", "", ""), + ("default", "local", "None", "None"), ("disk_s3", "object_storage", "s3", "local"), ("disk_s3_cache", "object_storage", "s3", "local"), ("disk_s3_other_bucket", "object_storage", "s3", "local"), From 57f6263f67dd91e624003199295c840a228947a0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 12:31:40 +0100 Subject: [PATCH 284/985] Lock contention fix --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 12 +++++++++--- src/Interpreters/Cache/FileCache.h | 2 +- src/Interpreters/Cache/FileCache_fwd.h | 1 + src/Interpreters/Cache/Guards.h | 15 ++++++++++++--- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c1ac3d08245..ab1a16a3edf 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -476,6 +476,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentRemoveMicroseconds, "File segment remove() time") \ M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 9c705ddc27c..5650b9ce44e 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -27,6 +27,7 @@ namespace ProfileEvents extern const Event FilesystemCacheReserveMicroseconds; extern const Event FilesystemCacheGetOrSetMicroseconds; extern const Event FilesystemCacheGetMicroseconds; + extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention; } namespace DB @@ -188,9 +189,9 @@ CacheGuard::Lock FileCache::lockCache() const return cache_guard.lock(); } -CacheGuard::Lock FileCache::tryLockCache() const +CacheGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const { - return cache_guard.tryLock(); + return acquire_timeout.has_value() ? cache_guard.tryLockFor(acquire_timeout.value()) : cache_guard.tryLock(); } FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const @@ -781,7 +782,12 @@ bool FileCache::tryReserve( ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); assertInitialized(); - auto cache_lock = lockCache(); + auto cache_lock = tryLockCache(std::chrono::milliseconds(FILECACHE_TRY_RESERVE_LOCK_TIMEOUT_MILLISECONDS)); + if (!cache_lock) + { + ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + return false; + } LOG_TEST( log, "Trying to reserve space ({} bytes) for {}:{}, current usage {}/{}", diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 5b665ad0271..7434b2ac78a 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -173,7 +173,7 @@ public: void deactivateBackgroundOperations(); CacheGuard::Lock lockCache() const; - CacheGuard::Lock tryLockCache() const; + CacheGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; std::vector sync(); diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 06261b19db7..eaed279e7fd 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -12,6 +12,7 @@ static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; +static constexpr size_t FILECACHE_TRY_RESERVE_LOCK_TIMEOUT_MILLISECONDS = 1000; /// 1 sec. class FileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 5729620d82f..0ac7cb80483 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -61,17 +61,26 @@ namespace DB */ struct CacheGuard : private boost::noncopyable { + using Mutex = std::timed_mutex; /// struct is used (not keyword `using`) to make CacheGuard::Lock non-interchangable with other guards locks /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example - struct Lock : public std::unique_lock + struct Lock : public std::unique_lock { - using Base = std::unique_lock; + using Base = std::unique_lock; using Base::Base; }; Lock lock() { return Lock(mutex); } + Lock tryLock() { return Lock(mutex, std::try_to_lock); } - std::mutex mutex; + + Lock tryLockFor(const std::chrono::milliseconds & acquire_timeout) + { + return Lock(mutex, std::chrono::duration(acquire_timeout)); + } + +private: + Mutex mutex; }; /** From c0689f3760c738dc1f73cf58c1c9de12b4c096a7 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 7 Mar 2024 11:24:34 +0000 Subject: [PATCH 285/985] Fix ASTRenameQuery::clone --- src/Parsers/ASTRenameQuery.h | 1 + src/Parsers/tests/gtest_Parser.cpp | 38 ++++++++++++++++++++++++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index 73d12be094a..582060ab34a 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -60,6 +60,7 @@ public: ASTPtr clone() const override { auto res = std::make_shared(*this); + res->cloneChildren(); cloneOutputOptions(*res); return res; } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 36d2deae8d7..19947cd38cc 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -62,10 +63,29 @@ TEST_P(ParserTest, parseQuery) if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { - WriteBufferFromOwnString buf; - formatAST(*ast->clone(), buf, false, false); - String formatted_ast = buf.str(); - EXPECT_EQ(expected_ast, formatted_ast); + ASTPtr ast_clone = ast->clone(); + { + WriteBufferFromOwnString buf; + formatAST(*ast_clone, buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); + } + + + ASTPtr ast_clone2 = ast_clone->clone(); + /// Break `ast_clone2`, it should not affect `ast_clone` if `clone()` implemented properly + for (auto & child : ast_clone2->children) + { + if (auto * identifier = dynamic_cast(child.get())) + identifier->setShortName("new_name"); + } + + { + WriteBufferFromOwnString buf; + formatAST(*ast_clone, buf, false, false); + String formatted_ast = buf.str(); + EXPECT_EQ(expected_ast, formatted_ast); + } } else { @@ -299,6 +319,16 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, } }))); +INSTANTIATE_TEST_SUITE_P(ParserRenameQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "RENAME TABLE eligible_test TO eligible_test2", + "RENAME TABLE eligible_test TO eligible_test2" + } +}))); + INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserKQLTest, ::testing::Combine( ::testing::Values(std::make_shared()), From 38b8e7ac7e87b0efcea7560735697de0583e0f17 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 8 Mar 2024 11:54:20 +0000 Subject: [PATCH 286/985] Maintain list of ASTRenameQuery children --- src/Interpreters/AddDefaultDatabaseVisitor.h | 8 +--- src/Interpreters/InterpreterCreateQuery.cpp | 3 +- src/Interpreters/InterpreterRenameQuery.cpp | 8 ++-- .../MySQL/InterpretersMySQLDDLQuery.cpp | 25 ++++++------ src/Interpreters/SystemLog.cpp | 3 +- src/Parsers/ASTRenameQuery.h | 38 ++++++++++++++++++- src/Parsers/ParserRenameQuery.cpp | 26 ++++--------- src/Storages/StorageMaterializedView.cpp | 5 ++- 8 files changed, 69 insertions(+), 47 deletions(-) diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index b977a73d461..e36f22e8ba1 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -275,13 +275,7 @@ private: if (only_replace_current_database_function) return; - for (ASTRenameQuery::Element & elem : node.elements) - { - if (!elem.from.database) - elem.from.database = std::make_shared(database_name); - if (!elem.to.database) - elem.to.database = std::make_shared(database_name); - } + node.setDatabaseIfNotExists(database_name); } void visitDDL(ASTAlterQuery & node, ASTPtr &) const diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d05d8b8deb1..edd7452c130 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1612,7 +1612,6 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, executeTrivialBlockIO(fill_io, getContext()); /// Replace target table with created one - auto ast_rename = std::make_shared(); ASTRenameQuery::Element elem { ASTRenameQuery::Table @@ -1627,7 +1626,7 @@ BlockIO InterpreterCreateQuery::doCreateOrReplaceTable(ASTCreateQuery & create, } }; - ast_rename->elements.push_back(std::move(elem)); + auto ast_rename = std::make_shared(ASTRenameQuery::Elements{std::move(elem)}); ast_rename->dictionary = create.is_dictionary; if (create.create_or_replace) { diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 52001fdcaf4..06b6ebc9cbb 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -47,12 +47,12 @@ BlockIO InterpreterRenameQuery::execute() */ RenameDescriptions descriptions; - descriptions.reserve(rename.elements.size()); + descriptions.reserve(rename.getElements().size()); /// Don't allow to drop tables (that we are renaming); don't allow to create tables in places where tables will be renamed. TableGuards table_guards; - for (const auto & elem : rename.elements) + for (const auto & elem : rename.getElements()) { descriptions.emplace_back(elem, current_database); const auto & description = descriptions.back(); @@ -186,7 +186,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename { AccessRightsElements required_access; const auto & rename = query_ptr->as(); - for (const auto & elem : rename.elements) + for (const auto & elem : rename.getElements()) { if (type == RenameType::RenameTable) { @@ -214,7 +214,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename void InterpreterRenameQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & ast, ContextPtr) const { const auto & rename = ast->as(); - for (const auto & element : rename.elements) + for (const auto & element : rename.getElements()) { { String database = backQuoteIfNeed(!element.from.database ? getContext()->getCurrentDatabase() : element.from.getDatabase()); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 107b435ded4..fd7ffca2872 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -579,7 +579,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries( const InterpreterRenameImpl::TQuery & rename_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database) { ASTRenameQuery::Elements elements; - for (const auto & rename_element : rename_query.elements) + for (const auto & rename_element : rename_query.getElements()) { const auto & to_database = resolveDatabase(rename_element.to.getDatabase(), mysql_database, mapped_to_database, context); const auto & from_database = resolveDatabase(rename_element.from.getDatabase(), mysql_database, mapped_to_database, context); @@ -600,8 +600,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries( if (elements.empty()) return ASTs{}; - auto rewritten_query = std::make_shared(); - rewritten_query->elements = elements; + auto rewritten_query = std::make_shared(std::move(elements)); return ASTs{rewritten_query}; } @@ -616,7 +615,8 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( return {}; auto rewritten_alter_query = std::make_shared(); - auto rewritten_rename_query = std::make_shared(); + ASTRenameQuery::Elements rename_elements; + rewritten_alter_query->setDatabase(mapped_to_database); rewritten_alter_query->setTable(alter_query.table); rewritten_alter_query->alter_object = ASTAlterQuery::AlterObjectType::TABLE; @@ -749,13 +749,13 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( /// For ALTER TABLE table_name RENAME TO new_table_name_1, RENAME TO new_table_name_2; /// We just need to generate RENAME TABLE table_name TO new_table_name_2; - if (rewritten_rename_query->elements.empty()) - rewritten_rename_query->elements.push_back(ASTRenameQuery::Element()); + if (rename_elements.empty()) + rename_elements.push_back(ASTRenameQuery::Element()); - rewritten_rename_query->elements.back().from.database = std::make_shared(mapped_to_database); - rewritten_rename_query->elements.back().from.table = std::make_shared(alter_query.table); - rewritten_rename_query->elements.back().to.database = std::make_shared(mapped_to_database); - rewritten_rename_query->elements.back().to.table = std::make_shared(alter_command->new_table_name); + rename_elements.back().from.database = std::make_shared(mapped_to_database); + rename_elements.back().from.table = std::make_shared(alter_query.table); + rename_elements.back().to.database = std::make_shared(mapped_to_database); + rename_elements.back().to.table = std::make_shared(alter_command->new_table_name); } } @@ -765,8 +765,11 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( if (!rewritten_alter_query->command_list->children.empty()) rewritten_queries.push_back(rewritten_alter_query); - if (!rewritten_rename_query->elements.empty()) + if (!rename_elements.empty()) + { + auto rewritten_rename_query = std::make_shared(std::move(rename_elements)); rewritten_queries.push_back(rewritten_rename_query); + } return rewritten_queries; } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 6580dc3e9b7..a74b5c67726 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -563,7 +563,6 @@ void SystemLog::prepareTable() {table_id.database_name, table_id.table_name + "_" + toString(suffix)}, getContext())) ++suffix; - auto rename = std::make_shared(); ASTRenameQuery::Element elem { ASTRenameQuery::Table @@ -586,7 +585,7 @@ void SystemLog::prepareTable() old_create_query, create_query); - rename->elements.emplace_back(std::move(elem)); + auto rename = std::make_shared(ASTRenameQuery::Elements{std::move(elem)}); ActionLock merges_lock; if (DatabaseCatalog::instance().getDatabase(table_id.database_name)->getUUID() == UUIDHelpers::Nil) diff --git a/src/Parsers/ASTRenameQuery.h b/src/Parsers/ASTRenameQuery.h index 582060ab34a..beaf93c4761 100644 --- a/src/Parsers/ASTRenameQuery.h +++ b/src/Parsers/ASTRenameQuery.h @@ -45,7 +45,6 @@ public: }; using Elements = std::vector; - Elements elements; bool exchange{false}; /// For EXCHANGE TABLES bool database{false}; /// For RENAME DATABASE @@ -54,6 +53,41 @@ public: /// Special flag for CREATE OR REPLACE. Do not throw if the second table does not exist. bool rename_if_cannot_exchange{false}; + explicit ASTRenameQuery(Elements elements_ = {}) + : elements(std::move(elements_)) + { + for (const auto & elem : elements) + { + if (elem.from.database) + children.push_back(elem.from.database); + if (elem.from.table) + children.push_back(elem.from.table); + if (elem.to.database) + children.push_back(elem.to.database); + if (elem.to.table) + children.push_back(elem.to.table); + } + } + + void setDatabaseIfNotExists(const String & database_name) + { + for (auto & elem : elements) + { + if (!elem.from.database) + { + elem.from.database = std::make_shared(database_name); + children.push_back(elem.from.database); + } + if (!elem.to.database) + { + elem.to.database = std::make_shared(database_name); + children.push_back(elem.to.database); + } + } + } + + const Elements & getElements() const { return elements; } + /** Get the text that identifies this element. */ String getID(char) const override { return "Rename"; } @@ -146,6 +180,8 @@ protected: formatOnCluster(settings); } + + Elements elements; }; } diff --git a/src/Parsers/ParserRenameQuery.cpp b/src/Parsers/ParserRenameQuery.cpp index 27f2ed1cd22..f9d29108ed6 100644 --- a/src/Parsers/ParserRenameQuery.cpp +++ b/src/Parsers/ParserRenameQuery.cpp @@ -44,15 +44,14 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) return false; } + ASTRenameQuery::Elements rename_elements; + rename_elements.emplace_back(); + rename_elements.back().if_exists = if_exists; + rename_elements.back().from.database = from_db; + rename_elements.back().to.database = to_db; - auto query = std::make_shared(); + auto query = std::make_shared(std::move(rename_elements)); query->database = true; - query->elements.emplace({}); - query->elements.front().if_exists = if_exists; - query->elements.front().from.database = from_db; - query->elements.front().to.database = to_db; - query->children.push_back(std::move(from_db)); - query->children.push_back(std::move(to_db)); query->cluster = cluster_str; node = query; return true; @@ -75,9 +74,8 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) const auto ignore_delim = [&] { return exchange ? s_and.ignore(pos) : s_to.ignore(pos); }; - auto query = std::make_shared(); - ASTRenameQuery::Elements & elements = query->elements; + ASTRenameQuery::Elements elements; while (true) { @@ -93,15 +91,6 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) || !ignore_delim() || !parseDatabaseAndTableAsAST(pos, expected, ref.to.database, ref.to.table)) return false; - - if (ref.from.database) - query->children.push_back(ref.from.database); - if (ref.from.table) - query->children.push_back(ref.from.table); - if (ref.to.database) - query->children.push_back(ref.to.database); - if (ref.to.table) - query->children.push_back(ref.to.table); } String cluster_str; @@ -111,6 +100,7 @@ bool ParserRenameQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; } + auto query = std::make_shared(std::move(elements)); query->cluster = cluster_str; query->exchange = exchange; query->dictionary = dictionary; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 02cba1cf753..9958d65819b 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -465,8 +465,8 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) if (!from_atomic_to_atomic_database && has_inner_table && tryGetTargetTable()) { auto new_target_table_name = generateInnerTableName(new_table_id); - auto rename = std::make_shared(); + ASTRenameQuery::Elements rename_elements; assert(inner_table_id.database_name == old_table_id.database_name); ASTRenameQuery::Element elem @@ -482,8 +482,9 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) std::make_shared(new_target_table_name) } }; - rename->elements.emplace_back(std::move(elem)); + rename_elements.emplace_back(std::move(elem)); + auto rename = std::make_shared(std::move(rename_elements)); InterpreterRenameQuery(rename, getContext()).execute(); updateTargetTableId(new_table_id.database_name, new_target_table_name); } From 59c6311ead26e48f861e27d19d58deffe4c6d622 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 11 Mar 2024 09:55:13 +0000 Subject: [PATCH 287/985] improve report #do_not_test --- utils/check-style/process_style_check_result.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/utils/check-style/process_style_check_result.py b/utils/check-style/process_style_check_result.py index e620d85b9d0..b043aa548d7 100755 --- a/utils/check-style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -32,8 +32,13 @@ def process_result(result_folder): if not os.path.exists(full_path): test_results.append((f"Check {name}", "SKIPPED")) elif os.stat(full_path).st_size != 0: + with open(full_path, 'r') as file: + lines = file.readlines() + if len(lines) > 100: + lines = lines[:100] + ['====TRIMMED===='] + content = "\n".join(lines) description += f"Check {name} failed. " - test_results.append((f"Check {name}", "FAIL")) + test_results.append((f"Check {name}", "FAIL", None, content)) status = "failure" else: test_results.append((f"Check {name}", "OK")) From 8f2ff2ccd833dd1cc839922c59a90360f2b882c7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 11 Mar 2024 13:07:39 +0100 Subject: [PATCH 288/985] Enable black back --- tests/ci/style_check.py | 18 +++++++++--------- utils/check-style/check_py.sh | 4 ++-- .../check-style/process_style_check_result.py | 10 +++++----- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9f113b6e6f9..7fb5d22a3ce 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 import argparse -from concurrent.futures import ProcessPoolExecutor import csv import logging import os import shutil import subprocess import sys +from concurrent.futures import ProcessPoolExecutor from pathlib import Path from typing import List, Tuple @@ -121,7 +121,7 @@ def checkout_last_ref(pr_info: PRInfo) -> None: def main(): logging.basicConfig(level=logging.INFO) logging.getLogger("git_helper").setLevel(logging.DEBUG) - # args = parse_args() + args = parse_args() stopwatch = Stopwatch() @@ -141,12 +141,14 @@ def main(): f"--entrypoint= -w/ClickHouse/utils/check-style " f"{image} ./check_cpp.sh" ) + cmd_py = ( f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " f"--entrypoint= -w/ClickHouse/utils/check-style " f"{image} ./check_py.sh" ) + cmd_docs = ( f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE " f"--volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output " @@ -175,12 +177,14 @@ def main(): _ = future1.result() if run_pycheck: + if args.push: + checkout_head(pr_info) logging.info("Run py files check: %s", cmd_py) future2 = executor.submit(subprocess.run, cmd_py, shell=True) _ = future2.result() - - # if args.push: - # checkout_head(pr_info) + if args.push: + commit_push_staged(pr_info) + checkout_last_ref(pr_info) subprocess.check_call( f"python3 ../../utils/check-style/process_style_check_result.py --in-results-dir {temp_path} " @@ -189,10 +193,6 @@ def main(): shell=True, ) - # if args.push: - # commit_push_staged(pr_info) - # checkout_last_ref(pr_info) - state, description, test_results, additional_files = process_result(temp_path) JobReport( diff --git a/utils/check-style/check_py.sh b/utils/check-style/check_py.sh index b729cd78124..13f4e754ed3 100755 --- a/utils/check-style/check_py.sh +++ b/utils/check-style/check_py.sh @@ -5,8 +5,8 @@ cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_ou start_total=`date +%s` # FIXME: 1 min to wait + head checkout -# echo "Check python formatting with black" | ts -# ./check-black -n |& tee /test_output/black_output.txt +echo "Check python formatting with black" | ts +./check-black -n |& tee /test_output/black_output.txt start=`date +%s` ./check-pylint -n |& tee /test_output/pylint_output.txt diff --git a/utils/check-style/process_style_check_result.py b/utils/check-style/process_style_check_result.py index b043aa548d7..8c9837b4fc6 100755 --- a/utils/check-style/process_style_check_result.py +++ b/utils/check-style/process_style_check_result.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 -import os -import logging import argparse import csv +import logging +import os # TODO: add typing and log files to the fourth column, think about launching @@ -17,7 +17,7 @@ def process_result(result_folder): # "shellcheck", "style", "pylint", - # "black", + "black", "mypy", "typos", "whitespaces", @@ -32,10 +32,10 @@ def process_result(result_folder): if not os.path.exists(full_path): test_results.append((f"Check {name}", "SKIPPED")) elif os.stat(full_path).st_size != 0: - with open(full_path, 'r') as file: + with open(full_path, "r") as file: lines = file.readlines() if len(lines) > 100: - lines = lines[:100] + ['====TRIMMED===='] + lines = lines[:100] + ["====TRIMMED===="] content = "\n".join(lines) description += f"Check {name} failed. " test_results.append((f"Check {name}", "FAIL", None, content)) From 1ea68265b50a8c8ded253e392181b3b81df72815 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 12:46:30 +0000 Subject: [PATCH 289/985] Revert "Adds makeDateTime64 function." This reverts commit bd194aab41401492c5d628269df53e68243a1211. --- .../functions/date-time-functions.md | 137 ++++-------------- .../functions/other-functions.md | 65 +-------- 2 files changed, 32 insertions(+), 170 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 12f0c996ce7..41503abfa2f 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -26,115 +26,66 @@ SELECT ## makeDate -Creates a [Date](../../sql-reference/data-types/date.md) from either one of the following sets of arguments: +Creates a [Date](../../sql-reference/data-types/date.md) +- from a year, month and day argument, or +- from a year and day of year argument. -- a year, month, and day. -- a year and day of year. +**Syntax** -### Syntax - -Using a year, month, and day: - -```sql -makeDate(year, month, day) +``` sql +makeDate(year, month, day); +makeDate(year, day_of_year); ``` -Using a year and day of year: +Alias: +- `MAKEDATE(year, month, day);` +- `MAKEDATE(year, day_of_year);` -```sql -makeDate(year, day_of_year) -``` - -### Arguments +**Arguments** - `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -### Returned values +**Returned value** -A date created from the arguments. +- A date created from the arguments. Type: [Date](../../sql-reference/data-types/date.md). -### Examples +**Example** Create a Date from a year, month and day: -```sql +``` sql SELECT makeDate(2023, 2, 28) AS Date; ``` -```response -2023-02-28 +Result: + +``` text +┌───────date─┐ +│ 2023-02-28 │ +└────────────┘ ``` -Create a Date from a year and day of year: +Create a Date from a year and day of year argument: ``` sql SELECT makeDate(2023, 42) AS Date; ``` -```response -2023-02-11 -``` +Result: +``` text +┌───────date─┐ +│ 2023-02-11 │ +└────────────┘ +``` ## makeDate32 -Creates a date of type [Date32](../../sql-reference/data-types/date32.md) from either one of the following sets of arguments: - -- a year, month, and day. -- a year and day of year. - -### Syntax - -Using a year, month, and day: - -```sql -makeDate32(year, month, day) -``` - -Using a year and day of year: - -```sql -makeDate32(year, day_of_year) -``` - -### Arguments - -- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). -- `day_of_year` — Day of the year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). - -### Returned values - -A date created from the arguments. - -Type: [Date32](../../sql-reference/data-types/date32.md). - -### Examples - -Create a date from a year, month, and day: - -```sql -SELECT makeDate32(2024, 1, 1); -``` - -```response -2024-01-01 -``` - -Create a Date from a year and day of year: - -``` sql -SELECT makeDate32(2024, 100); -``` - -```response -2024-04-09 -``` +Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). ## makeDateTime @@ -178,38 +129,12 @@ Result: ## makeDateTime64 -Create a [DateTime64](../../sql-reference/data-types/datetime64.md) data type value from its components (year, month, day, hour, minute, second, and optionally, subsecond precision). - -The DateTime64 data type stores both the date and time components in a single 64-bit integer value. The precision of the time component is configurable, allowing you to store time values with subsecond precision up to nanoseconds. +Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). **Syntax** -```sql -makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision]]) -``` - -**Arguments** - -- `year` — [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The year component (0-9999). -- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The month component (1-12). -- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The day component (1-31). -- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The hour component (0-23). -- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The minute component (0-59). -- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). The second component (0-59). -- `subsecond_precision` (optional) [Integer](../../sql-reference/data-types/int-uint.md): The precision of the subsecond component (0-9, where 0 means no subsecond precision, and 9 means nanosecond precision). - -**Returned value** - -A date and time element of type [DateTime64](../../sql-reference/data-types/datetime64.md) with created from the supplied arguments. - -**Example** - ``` sql -SELECT makeDateTime64(2023, 5, 15, 10, 30, 45, 779, 5); -``` - -```response -2023-05-15 10:30:45.00779 +makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) ``` ## timestamp diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 10ceedad9aa..739b688a0d2 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1866,7 +1866,7 @@ As you can see, `runningAccumulate` merges states for each group of rows separat ## joinGet -Allows you to extract data from a specific column in a Join table, similar to how you would access a value from a dictionary. +The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. @@ -1927,69 +1927,6 @@ Result: └──────────────────────────────────────────────────┘ ``` -## joinGetOrNull - -Allows you to extract data from a specific column in a Join table, similar to how you would access a value from a dictionary. - -Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. - -Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` statement. - -### Syntax - -```sql -joinGet(join_storage_table_name, `value_column`, join_keys) -``` - -### Parameters - -- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. -- `value_column` — name of the column of the table that contains required data. -- `join_keys` — list of keys. - -### Returned value - -Returns a list of values corresponded to list of keys. - -If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. - -More info about `join_use_nulls` in [Join operation](../../engines/table-engines/special/join.md). - -**Example** - -Input table: - -``` sql -CREATE DATABASE db_test -CREATE TABLE db_test.id_val(`id` UInt32, `val` UInt32) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1 -INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) -``` - -``` text -┌─id─┬─val─┐ -│ 4 │ 13 │ -│ 2 │ 12 │ -│ 1 │ 11 │ -└────┴─────┘ -``` - -Query: - -``` sql -SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 -``` - -Result: - -``` text -┌─joinGet(db_test.id_val, 'val', toUInt32(number))─┐ -│ 0 │ -│ 11 │ -│ 12 │ -│ 0 │ -└──────────────────────────────────────────────────┘ -``` - ## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n) :::note From a7350299396d5ba3f2322584195554a7d946562f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 12:50:54 +0000 Subject: [PATCH 290/985] Fix tests --- src/Common/DateLUTImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 4087e77d588..082127e717c 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -255,7 +255,7 @@ private: static LUTIndex toLUTIndex(ExtendedDayNum d) { - return normalizeLUTIndex(static_cast(d) + daynum_offset_epoch); + return normalizeLUTIndex(static_cast(d + daynum_offset_epoch)); /// NOLINT } LUTIndex toLUTIndex(Time t) const From 802bae9661a6f22a8c42a6f88f9816e3497d9355 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 12:54:34 +0000 Subject: [PATCH 291/985] GCC --> clang pragmas --- base/base/coverage.cpp | 2 +- base/base/sort.h | 6 +++--- programs/client/Client.cpp | 4 ++-- programs/copier/ClusterCopierApp.cpp | 4 ++-- programs/extract-from-config/ExtractFromConfig.cpp | 4 ++-- programs/format/Format.cpp | 4 ++-- programs/local/LocalServer.cpp | 4 ++-- programs/obfuscator/Obfuscator.cpp | 4 ++-- src/Common/SSH/Wrappers.cpp | 8 ++++---- src/Functions/GatherUtils/Sources.h | 8 ++++---- 10 files changed, 24 insertions(+), 24 deletions(-) diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index 99b897c4571..d96b3ea1e9a 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -1,7 +1,7 @@ #include "coverage.h" #include -#pragma GCC diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-identifier" /// WITH_COVERAGE enables the default implementation of code coverage, diff --git a/base/base/sort.h b/base/base/sort.h index 99bf8a0830e..e46c388d185 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator; #endif -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" #include @@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last) ::partial_sort(first, middle, last, comparator()); } -#pragma GCC diagnostic pop +#pragma clang diagnostic pop template void sort(RandomIt first, RandomIt last, Compare compare) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a2bd6b6016a..1c7e57dac76 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1377,8 +1377,8 @@ void Client::readArguments( } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClient(int argc, char ** argv) { diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index fdf07dec61a..ed748a17a55 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -232,8 +232,8 @@ int ClusterCopierApp::main(const std::vector &) } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClusterCopier(int argc, char ** argv) { diff --git a/programs/extract-from-config/ExtractFromConfig.cpp b/programs/extract-from-config/ExtractFromConfig.cpp index 56041ee382f..61d451664e3 100644 --- a/programs/extract-from-config/ExtractFromConfig.cpp +++ b/programs/extract-from-config/ExtractFromConfig.cpp @@ -109,8 +109,8 @@ static std::vector extractFromConfig( return {configuration->getString(key)}; } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseExtractFromConfig(int argc, char ** argv) { diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index a1c51565ae3..c92106e2f90 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -70,8 +70,8 @@ void skipSpacesAndComments(const char*& pos, const char* end, bool print_comment } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" extern const char * auto_time_zones[]; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 68f0e52ce08..99639d5e604 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -944,8 +944,8 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseLocal(int argc, char ** argv) { diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 242e995e466..317d93aaf0c 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1204,8 +1204,8 @@ public: } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#clang GCC diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseObfuscator(int argc, char ** argv) try diff --git a/src/Common/SSH/Wrappers.cpp b/src/Common/SSH/Wrappers.cpp index 463338dbe3f..a9b9f758c6e 100644 --- a/src/Common/SSH/Wrappers.cpp +++ b/src/Common/SSH/Wrappers.cpp @@ -2,13 +2,13 @@ # if USE_SSH # include -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wreserved-macro-identifier" -# pragma GCC diagnostic ignored "-Wreserved-identifier" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wreserved-macro-identifier" +# pragma clang diagnostic ignored "-Wreserved-identifier" # include -# pragma GCC diagnostic pop +# pragma clang diagnostic pop namespace { diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 222f9f19168..80fb9ce3900 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -140,9 +140,9 @@ struct NumericArraySource : public ArraySourceImpl> /// The methods can be virtual or not depending on the template parameter. See IStringSource. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsuggest-override" -#pragma GCC diagnostic ignored "-Wsuggest-destructor-override" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsuggest-override" +#pragma clang diagnostic ignored "-Wsuggest-destructor-override" template struct ConstSource : public Base @@ -231,7 +231,7 @@ struct ConstSource : public Base } }; -#pragma GCC diagnostic pop +#pragma clang diagnostic pop struct StringSource { From a7568ddbfaf107e9dafa6fa83c9d5f747a0e7153 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 12:58:20 +0000 Subject: [PATCH 292/985] Fix spelling --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 3614bcb7452..473907b45ae 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1846,6 +1846,7 @@ linearized lineasstring linefeeds lineorder +linestring linux llvm loadDefaultCAFile @@ -2204,7 +2205,9 @@ rankCorr rapidjson rawblob readWKTMultiPolygon +readWKTPoint readWKTPolygon +readWKTRing readahead readline readme From c4f29af8ec0927c09b9d12d83767607020915c30 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 11 Mar 2024 13:35:05 +0000 Subject: [PATCH 293/985] CI: fix nightly job issue #do_not_test --- tests/ci/ci.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 1eec9a6771b..9d57f161be3 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1756,7 +1756,8 @@ def main() -> int: result["build"] = build_digest result["docs"] = docs_digest result["ci_flags"] = ci_flags - result["stages_data"] = _generate_ci_stage_config(jobs_data) + if not args.skip_jobs: + result["stages_data"] = _generate_ci_stage_config(jobs_data) result["jobs_data"] = jobs_data result["docker_data"] = docker_data ### CONFIGURE action: end From 19d8256fa83a4e8353dcad372067085ec8f0828d Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:44:19 +0100 Subject: [PATCH 294/985] Update test.py --- tests/integration/test_backup_restore_s3/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index f3f4837c317..d65fc1f09d6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -130,11 +130,11 @@ def check_system_tables(backup_query_id=None): if disk ] expected_disks = ( - ("default", "local", "None", "None"), - ("disk_s3", "object_storage", "s3", "local"), - ("disk_s3_cache", "object_storage", "s3", "local"), - ("disk_s3_other_bucket", "object_storage", "s3", "local"), - ("disk_s3_plain", "object_storage", "s3", "plain"), + ("default", "Local", "None", "None"), + ("disk_s3", "ObjectStorage", "S3", "Local"), + ("disk_s3_cache", "ObjectStorage", "S3", "Local"), + ("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"), + ("disk_s3_plain", "ObjectStorage", "S3", "Plain"), ) assert len(expected_disks) == len(disks) for expected_disk in expected_disks: From 9bada70f45654495a30e394d94a374a862c24fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 14:52:32 +0100 Subject: [PATCH 295/985] Remove a bunch of transitive dependencies --- src/Backups/BackupCoordinationRemote.cpp | 2 ++ src/Formats/ReadSchemaUtils.cpp | 9 +++++---- src/Interpreters/DatabaseCatalog.h | 7 +++---- src/Interpreters/GraceHashJoin.cpp | 15 ++++++--------- src/Interpreters/TemporaryDataOnDisk.h | 6 +++--- src/Planner/PlannerExpressionAnalysis.cpp | 2 ++ src/Processors/QueryPlan/AggregatingStep.cpp | 1 + src/Processors/QueryPlan/CubeStep.cpp | 7 ++++--- src/Processors/QueryPlan/SortingStep.cpp | 1 + 9 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index 9c509858b2a..ec652f20069 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -14,6 +14,8 @@ #include #include +#include + namespace DB { diff --git a/src/Formats/ReadSchemaUtils.cpp b/src/Formats/ReadSchemaUtils.cpp index 5badf4301bf..b05b768899b 100644 --- a/src/Formats/ReadSchemaUtils.cpp +++ b/src/Formats/ReadSchemaUtils.cpp @@ -1,10 +1,11 @@ #include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include namespace DB { diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 4fe114cc493..6f05a3cea0f 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -1,15 +1,14 @@ #pragma once #include +#include +#include #include #include -#include #include #include -#include "Common/NamePrompter.h" +#include #include -#include "Storages/IStorage.h" -#include "Databases/IDatabase.h" #include #include diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 5fb92a68a29..53d1f48c291 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -1,21 +1,18 @@ +#include +#include +#include +#include #include #include #include - -#include #include - -#include +#include #include #include #include -#include -#include - -#include - #include +#include namespace CurrentMetrics diff --git a/src/Interpreters/TemporaryDataOnDisk.h b/src/Interpreters/TemporaryDataOnDisk.h index e57d9130369..8b0649be1b1 100644 --- a/src/Interpreters/TemporaryDataOnDisk.h +++ b/src/Interpreters/TemporaryDataOnDisk.h @@ -2,11 +2,11 @@ #include -#include -#include +#include #include -#include +#include #include +#include namespace CurrentMetrics diff --git a/src/Planner/PlannerExpressionAnalysis.cpp b/src/Planner/PlannerExpressionAnalysis.cpp index 52001eb27c5..30d90a68072 100644 --- a/src/Planner/PlannerExpressionAnalysis.cpp +++ b/src/Planner/PlannerExpressionAnalysis.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include #include #include diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index f374a7b7b10..a76bacdd97b 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 0c632c346c7..d010a3327a6 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,10 +1,11 @@ +#include +#include +#include +#include #include #include #include -#include #include -#include -#include namespace DB { diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 641b9036d4c..d0491cb4b82 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include From 91de3825171eefb8f828c2907181b6a5e6b0f017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 11 Mar 2024 14:00:01 +0000 Subject: [PATCH 296/985] Spit `DictionaryPipelineExecutor` into separate file --- src/Dictionaries/CacheDictionary.cpp | 2 +- .../DictionaryPipelineExecutor.cpp | 42 +++++++++++++++++++ src/Dictionaries/DictionaryPipelineExecutor.h | 27 ++++++++++++ src/Dictionaries/DictionarySourceHelpers.cpp | 29 ------------- src/Dictionaries/DictionarySourceHelpers.h | 17 -------- src/Dictionaries/FlatDictionary.cpp | 2 +- src/Dictionaries/HashedArrayDictionary.cpp | 1 + src/Dictionaries/HashedDictionary.h | 2 +- src/Dictionaries/IPAddressDictionary.cpp | 1 + src/Dictionaries/PolygonDictionary.cpp | 3 +- src/Dictionaries/RangeHashedDictionary.h | 6 +-- src/Dictionaries/RegExpTreeDictionary.cpp | 1 + .../registerRangeHashedDictionary.cpp | 5 ++- 13 files changed, 82 insertions(+), 56 deletions(-) create mode 100644 src/Dictionaries/DictionaryPipelineExecutor.cpp create mode 100644 src/Dictionaries/DictionaryPipelineExecutor.h diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 8444042db9e..b136d5ebd71 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/DictionaryPipelineExecutor.cpp b/src/Dictionaries/DictionaryPipelineExecutor.cpp new file mode 100644 index 00000000000..30d1ab95f53 --- /dev/null +++ b/src/Dictionaries/DictionaryPipelineExecutor.cpp @@ -0,0 +1,42 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) + : async_executor(async ? std::make_unique(pipeline_) : nullptr) + , executor(async ? nullptr : std::make_unique(pipeline_)) +{ +} + +bool DictionaryPipelineExecutor::pull(Block & block) +{ + if (async_executor) + { + while (true) + { + bool has_data = async_executor->pull(block); + if (has_data && !block) + continue; + return has_data; + } + } + else if (executor) + return executor->pull(block); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); +} + +DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; + +} diff --git a/src/Dictionaries/DictionaryPipelineExecutor.h b/src/Dictionaries/DictionaryPipelineExecutor.h new file mode 100644 index 00000000000..601213e5039 --- /dev/null +++ b/src/Dictionaries/DictionaryPipelineExecutor.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +class Block; +class QueryPipeline; +class PullingAsyncPipelineExecutor; +class PullingPipelineExecutor; + +/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor +class DictionaryPipelineExecutor +{ +public: + DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); + bool pull(Block & block); + + ~DictionaryPipelineExecutor(); + +private: + std::unique_ptr async_executor; + std::unique_ptr executor; +}; + +} diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index d9a4d9ccbcf..f0e1bc4109a 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -9,15 +9,11 @@ #include #include -#include -#include - namespace DB { namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } @@ -135,29 +131,4 @@ String TransformWithAdditionalColumns::getName() const return "TransformWithAdditionalColumns"; } -DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) - : async_executor(async ? std::make_unique(pipeline_) : nullptr) - , executor(async ? nullptr : std::make_unique(pipeline_)) -{} - -bool DictionaryPipelineExecutor::pull(Block & block) -{ - if (async_executor) - { - while (true) - { - bool has_data = async_executor->pull(block); - if (has_data && !block) - continue; - return has_data; - } - } - else if (executor) - return executor->pull(block); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); -} - -DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; - } diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h index a545b5cdac7..39c6e7b3c42 100644 --- a/src/Dictionaries/DictionarySourceHelpers.h +++ b/src/Dictionaries/DictionarySourceHelpers.h @@ -16,10 +16,6 @@ namespace DB struct DictionaryStructure; class SettingsChanges; -class PullingPipelineExecutor; -class PullingAsyncPipelineExecutor; -class QueryPipeline; - /// For simple key Block blockForIds( @@ -55,17 +51,4 @@ private: size_t current_range_index = 0; }; -/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor -class DictionaryPipelineExecutor -{ -public: - DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); - bool pull(Block & block); - - ~DictionaryPipelineExecutor(); -private: - std::unique_ptr async_executor; - std::unique_ptr executor; -}; - } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index efb7d0a176c..fc58ff525bd 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d09f402143e..2420c07277c 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index b3b8cc56868..46a0af487f5 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index e1c9572e607..1bc6d16c932 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 485b48d9d81..1456a0db750 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -1,6 +1,5 @@ #include "PolygonDictionary.h" -#include #include #include @@ -15,7 +14,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1a6ee7e81d4..509b991b30c 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -29,10 +31,6 @@ #include #include -#include -#include -#include - namespace DB { diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 4d82aa9ca0e..8930074bbe0 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp index 4e20abfdb79..8123b811198 100644 --- a/src/Dictionaries/registerRangeHashedDictionary.cpp +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -1,5 +1,8 @@ -#include "RangeHashedDictionary.h" +#include + #include +#include +#include #include namespace DB From 8b5ccb4735365ef81af4debcc3180f296452268d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 15:53:46 +0100 Subject: [PATCH 297/985] Remove one template --- src/Interpreters/Aggregator.cpp | 65 +++++++++++++-------------------- src/Interpreters/Aggregator.h | 4 +- 2 files changed, 28 insertions(+), 41 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 80a98683867..69625dbd57d 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2609,8 +2609,9 @@ void NO_INLINE Aggregator::mergeDataNullKey( } } -template -void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const +template +void NO_INLINE +Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]]) const { if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization) mergeDataNullKey(table_dst, table_src, arena); @@ -2637,7 +2638,7 @@ void NO_INLINE Aggregator::mergeDataImpl(Table & table_dst, Table & table_src, A table_src.clearAndShrink(); #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { const auto & compiled_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; compiled_functions.merge_aggregate_states_function(dst_places.data(), src_places.data(), dst_places.size()); @@ -2787,26 +2788,16 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( if (!no_more_keys) { + bool use_compiled_functions = false; #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - else - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - } - else + use_compiled_functions = compiled_aggregate_functions_holder != nullptr; #endif - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - else - mergeDataImpl( - getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool); - } + if (prefetch) + mergeDataImpl( + getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, use_compiled_functions); + else + mergeDataImpl( + getDataVariant(*res).data, getDataVariant(current).data, res->aggregates_pool, use_compiled_functions); } else if (res->without_key) { @@ -2851,26 +2842,22 @@ void NO_INLINE Aggregator::mergeBucketImpl( return; AggregatedDataVariants & current = *data[result_num]; + bool use_compiled_functions = false; #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - else - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - } - else + use_compiled_functions = compiled_aggregate_functions_holder != nullptr; #endif - { - if (prefetch) - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - else - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket], arena); - } + if (prefetch) + mergeDataImpl( + getDataVariant(*res).data.impls[bucket], + getDataVariant(current).data.impls[bucket], + arena, + use_compiled_functions); + else + mergeDataImpl( + getDataVariant(*res).data.impls[bucket], + getDataVariant(current).data.impls[bucket], + arena, + use_compiled_functions); } } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 375b8986101..67e82cdd784 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1429,8 +1429,8 @@ private: Arena * arena) const; /// Merge data from hash table `src` into `dst`. - template - void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena) const; + template + void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions) const; /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`. template From 724cc903afb9283a8369a62a836d04eceae42e57 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:56:02 +0100 Subject: [PATCH 298/985] Restart CI --- tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh index 1bf21dfc53b..6cd5c3b486c 100755 --- a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh +++ b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh @@ -9,4 +9,3 @@ DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.csv $CLICKHOUSE_LOCAL -q "select number > 1000000 ? 'error' : toString(number) from numbers(2000000) format CSV" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from file($DATA_FILE, CSV, 'x UInt64') format Null settings input_format_allow_errors_ratio=1" rm $DATA_FILE - From ecc30448baa1c6283f3f0f13c83cfd1bf4428b9b Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Mar 2024 15:26:29 +0000 Subject: [PATCH 299/985] Fix filtering when optimize_use_implicit_projections=1 --- .../optimizeUseAggregateProjection.cpp | 4 ++ src/Storages/VirtualColumnUtils.cpp | 2 +- src/Storages/VirtualColumnUtils.h | 3 + ...ions_non_deterministoc_functions.reference | 55 +++++++++++++++++++ ...rojections_non_deterministoc_functions.sql | 28 ++++++++++ 5 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference create mode 100644 tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 91f4213ff43..b40fea47b3c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -464,6 +465,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( // LOG_TRACE(getLogger("optimizeUseProjections"), "Query DAG: {}", dag.dag->dumpDAG()); candidates.has_filter = dag.filter_node; + /// We can't use minmax projection if filter has non-deterministic functions. + if (dag.filter_node && !VirtualColumnUtils::isDeterministicInScopeOfQuery(dag.filter_node)) + can_use_minmax_projection = false; if (can_use_minmax_projection) { diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 897090223d6..c3ac27903c9 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -238,7 +238,7 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } -static bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) +bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) { for (const auto * child : node->children) { diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index b5526fc5c7f..83494872cac 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -25,6 +25,9 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, /// Just filters block. Block should contain all the required columns. void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); +/// Recursively checks if all functions used in DAG are deterministic in scope of query. +bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); + /// Extract a part of predicate that can be evaluated using only columns from input_names. ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); diff --git a/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference new file mode 100644 index 00000000000..8233925d609 --- /dev/null +++ b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference @@ -0,0 +1,55 @@ +-- count +100000 all_10_10_0 +100000 all_1_1_0 +100000 all_2_2_0 +100000 all_3_3_0 +100000 all_4_4_0 +100000 all_5_5_0 +100000 all_6_6_0 +100000 all_7_7_0 +100000 all_8_8_0 +100000 all_9_9_0 +-- rand()%2=0: +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_use_implicit_projections=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_trivial_count_query=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_trivial_count_query=0, optimize_use_implicit_projections=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 diff --git a/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql new file mode 100644 index 00000000000..3be9bc3982a --- /dev/null +++ b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql @@ -0,0 +1,28 @@ +create table test (number UInt64) engine=MergeTree order by number; +system stop merges test; +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); + +select '-- count'; +SELECT count(), _part FROM test GROUP BY _part ORDER BY _part; + +select '-- rand()%2=0:'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(1)%2=1 GROUP BY _part ORDER BY _part; + +select '-- optimize_use_implicit_projections=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(2)%2=1 GROUP BY _part ORDER BY _part settings optimize_use_implicit_projections=0; + +select '-- optimize_trivial_count_query=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(3)%2=1 GROUP BY _part ORDER BY _part settings optimize_trivial_count_query=0; + +select '-- optimize_trivial_count_query=0, optimize_use_implicit_projections=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(4)%2=1 GROUP BY _part ORDER BY _part settings optimize_trivial_count_query=0,optimize_use_implicit_projections=0; + From 879f7f2f8c862aae51ddc5a8faebb8d07b5d4493 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 16:28:25 +0100 Subject: [PATCH 300/985] Remove more templates for JIT --- src/Interpreters/Aggregator.cpp | 185 ++++++++++++++------------------ src/Interpreters/Aggregator.h | 21 ++-- 2 files changed, 96 insertions(+), 110 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 69625dbd57d..c7ce3e46446 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1111,6 +1111,7 @@ void NO_INLINE Aggregator::executeImpl( bool all_keys_are_const, AggregateDataPtr overflow_row) const { + bool use_compiled_functions = false; if (!no_more_keys) { /// Prefetching doesn't make sense for small hash tables, because they fit in caches entirely. @@ -1118,33 +1119,47 @@ void NO_INLINE Aggregator::executeImpl( && (method.data.getBufferSizeInBytes() > min_bytes_for_prefetch); #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions)) - { - if (prefetch) - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - else - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - } - else + use_compiled_functions = compiled_aggregate_functions_holder && !hasSparseArguments(aggregate_instructions); #endif - { - if (prefetch) - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - else - executeImplBatch( - method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); - } + if (prefetch) + executeImplBatch( + method, + state, + aggregates_pool, + row_begin, + row_end, + aggregate_instructions, + all_keys_are_const, + use_compiled_functions, + overflow_row); + else + executeImplBatch( + method, + state, + aggregates_pool, + row_begin, + row_end, + aggregate_instructions, + all_keys_are_const, + use_compiled_functions, + overflow_row); } else { - executeImplBatch(method, state, aggregates_pool, row_begin, row_end, aggregate_instructions, all_keys_are_const, overflow_row); + executeImplBatch( + method, + state, + aggregates_pool, + row_begin, + row_end, + aggregate_instructions, + all_keys_are_const, + use_compiled_functions, + overflow_row); } } -template +template void NO_INLINE Aggregator::executeImplBatch( Method & method, State & state, @@ -1153,6 +1168,7 @@ void NO_INLINE Aggregator::executeImplBatch( size_t row_end, AggregateFunctionInstruction * aggregate_instructions, bool all_keys_are_const, + bool use_compiled_functions [[maybe_unused]], AggregateDataPtr overflow_row) const { using KeyHolder = decltype(state.getKeyHolder(0, std::declval())); @@ -1284,7 +1300,7 @@ void NO_INLINE Aggregator::executeImplBatch( aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { const auto & compiled_aggregate_functions = compiled_aggregate_functions_holder->compiled_aggregate_functions; compiled_aggregate_functions.create_aggregate_states_function(aggregate_data); @@ -1293,20 +1309,6 @@ void NO_INLINE Aggregator::executeImplBatch( static constexpr bool skip_compiled_aggregate_functions = true; createAggregateStates(aggregate_data); } - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = aggregate_data + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif } else #endif @@ -1339,7 +1341,7 @@ void NO_INLINE Aggregator::executeImplBatch( } #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { std::vector columns_data; @@ -1372,9 +1374,8 @@ void NO_INLINE Aggregator::executeImplBatch( for (size_t i = 0; i < aggregate_functions.size(); ++i) { #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[i]) - continue; + if (use_compiled_functions && is_aggregate_function_compiled[i]) + continue; #endif AggregateFunctionInstruction * inst = aggregate_instructions + i; @@ -1387,18 +1388,19 @@ void NO_INLINE Aggregator::executeImplBatch( } -template void NO_INLINE Aggregator::executeWithoutKeyImpl( AggregatedDataWithoutKey & res, - size_t row_begin, size_t row_end, + size_t row_begin, + size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) const + Arena * arena, + bool use_compiled_functions [[maybe_unused]]) const { if (row_begin == row_end) return; #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { std::vector columns_data; @@ -1418,20 +1420,6 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place; add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), res); - -#if defined(MEMORY_SANITIZER) - - /// We compile only functions that do not allocate some data in Arena. Only store necessary state in AggregateData place. - for (size_t aggregate_function_index = 0; aggregate_function_index < aggregate_functions.size(); ++aggregate_function_index) - { - if (!is_aggregate_function_compiled[aggregate_function_index]) - continue; - - auto aggregate_data_with_offset = res + offsets_of_aggregate_states[aggregate_function_index]; - auto data_size = params.aggregates[aggregate_function_index].function->sizeOfData(); - __msan_unpoison(aggregate_data_with_offset, data_size); - } -#endif } #endif @@ -1439,13 +1427,10 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( for (size_t i = 0; i < aggregate_functions.size(); ++i) { AggregateFunctionInstruction * inst = aggregate_instructions + i; - #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) - if (is_aggregate_function_compiled[i]) - continue; + if (use_compiled_functions && is_aggregate_function_compiled[i]) + continue; #endif - addBatchSinglePlace(row_begin, row_end, inst, res + inst->state_offset, arena); } } @@ -1704,16 +1689,14 @@ bool Aggregator::executeOnBlock(Columns columns, if (result.type == AggregatedDataVariants::Type::without_key) { /// TODO: Enable compilation after investigation -// #if USE_EMBEDDED_COMPILER -// if (compiled_aggregate_functions_holder) -// { -// executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); -// } -// else -// #endif - { - executeWithoutKeyImpl(result.without_key, row_begin, row_end, aggregate_functions_instructions.data(), result.aggregates_pool); - } + bool use_compiled_functions = false; + executeWithoutKeyImpl( + result.without_key, + row_begin, + row_end, + aggregate_functions_instructions.data(), + result.aggregates_pool, + use_compiled_functions); } else { @@ -1965,19 +1948,13 @@ Aggregator::convertToBlockImpl(Method & method, Table & data, Arena * arena, Are ConvertToBlockRes res; + bool use_compiled_functions = false; if (final) { #if USE_EMBEDDED_COMPILER - if (compiled_aggregate_functions_holder) - { - static constexpr bool use_compiled_functions = !Method::low_cardinality_optimization; - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows); - } - else + use_compiled_functions = compiled_aggregate_functions_holder != nullptr && !Method::low_cardinality_optimization; #endif - { - res = convertToBlockImplFinal(method, data, arena, aggregates_pools, rows); - } + res = convertToBlockImplFinal(method, data, arena, aggregates_pools, use_compiled_functions, rows); } else { @@ -2059,8 +2036,12 @@ inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColu } -template -Block Aggregator::insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data [[maybe_unused]]) const +Block Aggregator::insertResultsIntoColumns( + PaddedPODArray & places, + OutputBlockColumns && out_cols, + Arena * arena, + bool has_null_key_data [[maybe_unused]], + bool use_compiled_functions [[maybe_unused]]) const { std::exception_ptr exception; size_t aggregate_functions_destroy_index = 0; @@ -2068,7 +2049,7 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl try { #if USE_EMBEDDED_COMPILER - if constexpr (use_compiled_functions) + if (use_compiled_functions) { /** For JIT compiled functions we need to resize columns before pass them into compiled code. * insert_aggregates_into_columns_function function does not throw exception. @@ -2098,14 +2079,13 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl for (; aggregate_functions_destroy_index < params.aggregates_size;) { - if constexpr (use_compiled_functions) +#if USE_EMBEDDED_COMPILER + if (use_compiled_functions && is_aggregate_function_compiled[aggregate_functions_destroy_index]) { - if (is_aggregate_function_compiled[aggregate_functions_destroy_index]) - { - ++aggregate_functions_destroy_index; - continue; - } + ++aggregate_functions_destroy_index; + continue; } +#endif auto & final_aggregate_column = out_cols.final_aggregate_columns[aggregate_functions_destroy_index]; size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; @@ -2127,14 +2107,13 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl for (; aggregate_functions_destroy_index < params.aggregates_size; ++aggregate_functions_destroy_index) { - if constexpr (use_compiled_functions) +#if USE_EMBEDDED_COMPILER + if (use_compiled_functions && is_aggregate_function_compiled[aggregate_functions_destroy_index]) { - if (is_aggregate_function_compiled[aggregate_functions_destroy_index]) - { - ++aggregate_functions_destroy_index; - continue; - } + ++aggregate_functions_destroy_index; + continue; } +#endif size_t offset = offsets_of_aggregate_states[aggregate_functions_destroy_index]; aggregate_functions[aggregate_functions_destroy_index]->destroyBatch(0, places.size(), places.data(), offset); @@ -2146,9 +2125,9 @@ Block Aggregator::insertResultsIntoColumns(PaddedPODArray & pl return finalizeBlock(params, getHeader(/* final */ true), std::move(out_cols), /* final */ true, places.size()); } -template -Aggregator::ConvertToBlockRes NO_INLINE -Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const +template +Aggregator::ConvertToBlockRes NO_INLINE Aggregator::convertToBlockImplFinal( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool use_compiled_functions [[maybe_unused]], size_t) const { /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; @@ -2204,7 +2183,8 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena { if (places.size() >= max_block_size) { - res.emplace_back(insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data)); + res.emplace_back( + insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions)); places.clear(); out_cols.reset(); has_null_key_data = false; @@ -2214,12 +2194,13 @@ Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena if constexpr (return_single_block) { - return insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data); + return insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions); } else { if (out_cols.has_value()) - res.emplace_back(insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data)); + res.emplace_back( + insertResultsIntoColumns(places, std::move(out_cols.value()), arena, has_null_key_data, use_compiled_functions)); return res; } } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 67e82cdd784..d7bbe5950a0 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1395,7 +1395,7 @@ private: AggregateDataPtr overflow_row) const; /// Specialization for a particular value no_more_keys. - template + template void executeImplBatch( Method & method, State & state, @@ -1404,16 +1404,17 @@ private: size_t row_end, AggregateFunctionInstruction * aggregate_instructions, bool all_keys_are_const, + bool use_compiled_functions, AggregateDataPtr overflow_row) const; /// For case when there are no keys (all aggregate into one row). - template void executeWithoutKeyImpl( AggregatedDataWithoutKey & res, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) const; + Arena * arena, + bool use_compiled_functions) const; template void writeToTemporaryFileImpl( @@ -1467,12 +1468,16 @@ private: MutableColumns & final_aggregate_columns, Arena * arena) const; - template - Block insertResultsIntoColumns(PaddedPODArray & places, OutputBlockColumns && out_cols, Arena * arena, bool has_null_key_data) const; + Block insertResultsIntoColumns( + PaddedPODArray & places, + OutputBlockColumns && out_cols, + Arena * arena, + bool has_null_key_data, + bool use_compiled_functions) const; - template - ConvertToBlockRes - convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t rows) const; + template + ConvertToBlockRes convertToBlockImplFinal( + Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, bool use_compiled_functions, size_t rows) const; template ConvertToBlockRes From 81b2a1f621d9bd64fde2c4e4f6a83c9b3b0c461a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 15:34:02 +0000 Subject: [PATCH 301/985] Fix build --- programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 317d93aaf0c..b2bf942af4e 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1205,7 +1205,7 @@ public: } #pragma clang diagnostic ignored "-Wunused-function" -#clang GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseObfuscator(int argc, char ** argv) try From 38f41ee311d0a36d194965e5815489a25c60e449 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 16:55:30 +0100 Subject: [PATCH 302/985] Fix integration test --- tests/integration/test_disk_types/test.py | 10 +++++----- .../test_endpoint_macro_substitution/test.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index af482b97be3..86579911b3e 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -3,10 +3,10 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV disk_types = { - "default": "local", - "disk_s3": "s3", - "disk_hdfs": "hdfs", - "disk_encrypted": "s3", + "default": "Local", + "disk_s3": "S3", + "disk_hdfs": "HDFS", + "disk_encrypted": "S3", } @@ -55,7 +55,7 @@ def test_different_types(cluster): def test_select_by_type(cluster): node = cluster.instances["node"] for name, disk_type in list(disk_types.items()): - if disk_type != "s3": + if disk_type != "S3": assert ( node.query( "SELECT name FROM system.disks WHERE type='" + disk_type + "'" diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index 42a8ddbda84..bec3d9de0e3 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -4,10 +4,10 @@ from helpers.test_tools import TSV from pyhdfs import HdfsClient disk_types = { - "default": "local", - "disk_s3": "s3", - "disk_hdfs": "hdfs", - "disk_encrypted": "s3", + "default": "Local", + "disk_s3": "S3", + "disk_hdfs": "HDFS", + "disk_encrypted": "S3", } @@ -63,7 +63,7 @@ def test_select_by_type(cluster): fs = HdfsClient(hosts=cluster.hdfs_ip) for name, disk_type in list(disk_types.items()): - if disk_type != "s3": + if disk_type != "S3": assert ( node.query( "SELECT name FROM system.disks WHERE type='" + disk_type + "'" From 5db08292455fb0c6f47fc0344382ab7cf3508e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 17:20:53 +0100 Subject: [PATCH 303/985] Remove another template --- src/Interpreters/Aggregator.cpp | 55 ++++++++++++++------------------- src/Interpreters/Aggregator.h | 3 +- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c7ce3e46446..a9578b5540f 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2906,11 +2906,12 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData return non_empty_data; } -template +template void NO_INLINE Aggregator::mergeStreamsImplCase( Arena * aggregates_pool, State & state, Table & data, + bool no_more_keys, AggregateDataPtr overflow_row, size_t row_begin, size_t row_end, @@ -2922,36 +2923,34 @@ void NO_INLINE Aggregator::mergeStreamsImplCase( if (!arena_for_keys) arena_for_keys = aggregates_pool; - for (size_t i = row_begin; i < row_end; ++i) + if (no_more_keys) { - AggregateDataPtr aggregate_data = nullptr; - - if constexpr (!no_more_keys) + for (size_t i = row_begin; i < row_end; i++) { - auto emplace_result = state.emplaceKey(data, i, *arena_for_keys); // NOLINT - if (emplace_result.isInserted()) + auto find_result = state.findKey(data, i, *arena_for_keys); + /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. + AggregateDataPtr value = find_result.isFound() ? find_result.getMapped() : overflow_row; + places[i] = value; + } + } + else + { + for (size_t i = row_begin; i < row_end; i++) + { + auto emplace_result = state.emplaceKey(data, i, *arena_for_keys); + if (!emplace_result.isInserted()) + places[i] = emplace_result.getMapped(); + else { emplace_result.setMapped(nullptr); - aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); + AggregateDataPtr aggregate_data = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); createAggregateStates(aggregate_data); emplace_result.setMapped(aggregate_data); + places[i] = aggregate_data; } - else - aggregate_data = emplace_result.getMapped(); } - else - { - auto find_result = state.findKey(data, i, *arena_for_keys); - if (find_result.isFound()) - aggregate_data = find_result.getMapped(); - } - - /// aggregate_date == nullptr means that the new key did not fit in the hash table because of no_more_keys. - - AggregateDataPtr value = aggregate_data ? aggregate_data : overflow_row; - places[i] = value; } for (size_t j = 0; j < params.aggregates_size; ++j) @@ -3005,22 +3004,16 @@ void NO_INLINE Aggregator::mergeStreamsImpl( if (use_cache) { typename Method::State state(key_columns, key_sizes, aggregation_state_cache); - - if (!no_more_keys) - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); - else - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); + mergeStreamsImplCase( + aggregates_pool, state, data, no_more_keys, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); consecutive_keys_cache_stats.update(row_end - row_begin, state.getCacheMissesSinceLastReset()); } else { typename Method::StateNoCache state(key_columns, key_sizes, aggregation_state_cache); - - if (!no_more_keys) - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); - else - mergeStreamsImplCase(aggregates_pool, state, data, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); + mergeStreamsImplCase( + aggregates_pool, state, data, no_more_keys, overflow_row, row_begin, row_end, aggregate_columns_data, arena_for_keys); } } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index d7bbe5950a0..6c357623003 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1513,11 +1513,12 @@ private: bool final, ThreadPool * thread_pool) const; - template + template void mergeStreamsImplCase( Arena * aggregates_pool, State & state, Table & data, + bool no_more_keys, AggregateDataPtr overflow_row, size_t row_begin, size_t row_end, From 74281b0a4e3d9328d01379129ace85e03c145fa4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Mar 2024 16:42:45 +0000 Subject: [PATCH 304/985] Avoid some logical errors in experimantal Object type --- src/Columns/ColumnObject.cpp | 8 ++-- src/Common/ErrorCodes.cpp | 1 + src/DataTypes/ObjectUtils.cpp | 41 ++++++++++++++----- .../Serializations/SerializationObject.cpp | 6 +-- 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index ac2ee309e87..25a62440629 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -20,12 +20,12 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int DUPLICATE_COLUMN; extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int EXPERIMENTAL_FEATURE_ERROR; } namespace @@ -247,7 +247,7 @@ void ColumnObject::Subcolumn::checkTypes() const prefix_types.push_back(current_type); auto prefix_common_type = getLeastSupertype(prefix_types); if (!prefix_common_type->equals(*current_type)) - throw Exception(ErrorCodes::LOGICAL_ERROR, + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Data type {} of column at position {} cannot represent all columns from i-th prefix", current_type->getName(), i); } @@ -635,7 +635,7 @@ void ColumnObject::checkConsistency() const { if (num_rows != leaf->data.size()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject." + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Sizes of subcolumns are inconsistent in ColumnObject." " Subcolumn '{}' has {} rows, but expected size is {}", leaf->path.getPath(), leaf->data.size(), num_rows); } @@ -919,7 +919,7 @@ void ColumnObject::addSubcolumn(const PathInData & key, size_t new_size) void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size) { if (!key.hasNested()) - throw Exception(ErrorCodes::LOGICAL_ERROR, + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot add Nested subcolumn, because path doesn't contain Nested"); bool inserted = false; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 44463f7f437..9f2572cbfc6 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -598,6 +598,7 @@ M(714, UNEXPECTED_CLUSTER) \ M(715, CANNOT_DETECT_FORMAT) \ M(716, CANNOT_FORGET_PARTITION) \ + M(717, EXPERIMENTAL_FEATURE_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 23d29136c85..48392a614a5 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -28,9 +28,9 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; - extern const int LOGICAL_ERROR; extern const int INCOMPATIBLE_COLUMNS; extern const int NOT_IMPLEMENTED; + extern const int EXPERIMENTAL_FEATURE_ERROR; } size_t getNumberOfDimensions(const IDataType & type) @@ -92,7 +92,7 @@ ColumnPtr createArrayOfColumn(ColumnPtr column, size_t num_dimensions) Array createEmptyArrayField(size_t num_dimensions) { if (num_dimensions == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create array field with 0 dimensions"); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot create array field with 0 dimensions"); Array array; Array * current_array = &array; @@ -231,7 +231,7 @@ static std::pair recursivlyConvertDynamicColumnToTuple( }; } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type->getName()); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type->getName()); } void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & storage_snapshot) @@ -247,7 +247,7 @@ void convertDynamicColumnsToTuples(Block & block, const StorageSnapshotPtr & sto GetColumnsOptions options(GetColumnsOptions::AllPhysical); auto storage_column = storage_snapshot->tryGetColumn(options, column.name); if (!storage_column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Column '{}' not found in storage", column.name); auto storage_column_concrete = storage_snapshot->getColumn(options.withExtendedObjects(), column.name); @@ -315,7 +315,7 @@ static DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool che { const auto * type_tuple = typeid_cast(type.get()); if (!type_tuple) - throw Exception(ErrorCodes::LOGICAL_ERROR, + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Least common type for object can be deduced only from tuples, but {} given", type->getName()); auto [tuple_paths, tuple_types] = flattenTuple(type); @@ -427,7 +427,7 @@ static DataTypePtr getLeastCommonTypeForDynamicColumnsImpl( if (const auto * type_tuple = typeid_cast(type_in_storage.get())) return getLeastCommonTypeForTuple(*type_tuple, concrete_types, check_ambiguos_paths); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName()); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName()); } DataTypePtr getLeastCommonTypeForDynamicColumns( @@ -481,7 +481,7 @@ DataTypePtr createConcreteEmptyDynamicColumn(const DataTypePtr & type_in_storage return recreateTupleWithElements(*type_tuple, new_elements); } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName()); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Type {} unexpectedly has dynamic columns", type_in_storage->getName()); } bool hasDynamicSubcolumns(const ColumnsDescription & columns) @@ -613,7 +613,7 @@ DataTypePtr reduceNumberOfDimensions(DataTypePtr type, size_t dimensions_to_redu { const auto * type_array = typeid_cast(type.get()); if (!type_array) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce"); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough dimensions to reduce"); type = type_array->getNestedType(); } @@ -627,7 +627,7 @@ ColumnPtr reduceNumberOfDimensions(ColumnPtr column, size_t dimensions_to_reduce { const auto * column_array = typeid_cast(column.get()); if (!column_array) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough dimensions to reduce"); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough dimensions to reduce"); column = column_array->getDataPtr(); } @@ -705,6 +705,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node) size_t num_elements = tuple_columns.size(); Columns tuple_elements_columns(num_elements); DataTypes tuple_elements_types(num_elements); + size_t last_offset = assert_cast(*offsets_columns.back()).getData().back(); /// Reduce extra array dimensions to get columns and types of Nested elements. for (size_t i = 0; i < num_elements; ++i) @@ -712,6 +713,14 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node) assert(tuple_columns[i].array_dimensions == tuple_columns[0].array_dimensions); tuple_elements_columns[i] = reduceNumberOfDimensions(tuple_columns[i].column, tuple_columns[i].array_dimensions); tuple_elements_types[i] = reduceNumberOfDimensions(tuple_columns[i].type, tuple_columns[i].array_dimensions); + if (tuple_elements_columns[i]->size() != last_offset) + throw Exception( + ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, + "Cannot create a type for subcolumn {} in Object data type: offsets_column has data inconsistent with nested_column. " + "Data size: {}, last offset: {}", + node.path.getPath(), + tuple_elements_columns[i]->size(), + last_offset); } auto result_column = ColumnArray::create(ColumnTuple::create(tuple_elements_columns), offsets_columns.back()); @@ -720,6 +729,16 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node) /// Recreate result Array type and Array column. for (auto it = offsets_columns.rbegin() + 1; it != offsets_columns.rend(); ++it) { + last_offset = assert_cast((**it)).getData().back(); + if (result_column->size() != last_offset) + throw Exception( + ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, + "Cannot create a type for subcolumn {} in Object data type: offsets_column has data inconsistent with nested_column. " + "Data size: {}, last offset: {}", + node.path.getPath(), + result_column->size(), + last_offset); + result_column = ColumnArray::create(result_column, *it); result_type = std::make_shared(result_type); } @@ -822,7 +841,7 @@ std::pair unflattenTuple( assert(paths.size() == tuple_columns.size()); if (paths.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unflatten empty Tuple"); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot unflatten empty Tuple"); /// We add all paths to the subcolumn tree and then create a type from it. /// The tree stores column, type and number of array dimensions @@ -841,7 +860,7 @@ std::pair unflattenTuple( tree.add(paths[i], [&](Node::Kind kind, bool exists) -> std::shared_ptr { if (pos >= num_parts) - throw Exception(ErrorCodes::LOGICAL_ERROR, + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Not enough name parts for path {}. Expected at least {}, got {}", paths[i].getPath(), pos + 1, num_parts); diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index e6dc16ef5a0..1bdc29daebd 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -29,7 +29,7 @@ namespace ErrorCodes extern const int INCORRECT_DATA; extern const int CANNOT_READ_ALL_DATA; extern const int ARGUMENT_OUT_OF_BOUND; - extern const int LOGICAL_ERROR; + extern const int EXPERIMENTAL_FEATURE_ERROR; } template @@ -177,7 +177,7 @@ void SerializationObject::serializeBinaryBulkStatePrefix( auto * stream = settings.getter(settings.path); if (!stream) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Missing stream for kind of binary serialization"); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Missing stream for kind of binary serialization"); auto [tuple_column, tuple_type] = unflattenObjectToTuple(column_object); @@ -288,7 +288,7 @@ void SerializationObject::serializeBinaryBulkWithMultipleStreams( if (!state_object->nested_type->equals(*tuple_type)) { - throw Exception(ErrorCodes::LOGICAL_ERROR, + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Types of internal column of Object mismatched. Expected: {}, Got: {}", state_object->nested_type->getName(), tuple_type->getName()); } From 260c6387555b3823e56db135f80adaa5e0c300ab Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Mar 2024 16:59:01 +0000 Subject: [PATCH 305/985] Replace forgotten logical error --- src/DataTypes/ObjectUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 48392a614a5..c565930f49a 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -653,7 +653,7 @@ ColumnWithTypeAndDimensions createTypeFromNode(const Node & node) auto collect_tuple_elemets = [](const auto & children) { if (children.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create type from empty Tuple or Nested node"); + throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR, "Cannot create type from empty Tuple or Nested node"); std::vector> tuple_elements; tuple_elements.reserve(children.size()); From 16e01eb93ad449c61417dcaccd570439364b0714 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 18:05:51 +0100 Subject: [PATCH 306/985] Fix style --- src/Core/Settings.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index fb456b46d89..8257b94cd9f 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int THERE_IS_NO_PROFILE; extern const int NO_ELEMENTS_IN_CONFIG; extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int BAD_ARGUMENTS; } IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) From f656a015385898602cb651b419b46927f99ab602 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 11 Mar 2024 16:39:13 +0000 Subject: [PATCH 307/985] CI: fix sync build issue with reuse #do_not_test --- tests/ci/ci.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 9d57f161be3..cc4d0b11eef 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -140,7 +140,7 @@ class CiCache: self.s3 = s3 self.job_digests = job_digests self.cache_s3_paths = { - job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self.job_digests[self._get_reference_job_name(job_type)]}/" + job_type: f"{self._S3_CACHE_PREFIX}/{job_type.value}-{self._get_digest_for_job_type(self.job_digests, job_type)}/" for job_type in self.JobType } self.s3_record_prefixes = { @@ -155,14 +155,23 @@ class CiCache: if not self._LOCAL_CACHE_PATH.exists(): self._LOCAL_CACHE_PATH.mkdir(parents=True, exist_ok=True) - def _get_reference_job_name(self, job_type: JobType) -> str: - res = Build.PACKAGE_RELEASE + def _get_digest_for_job_type( + self, job_digests: Dict[str, str], job_type: JobType + ) -> str: if job_type == self.JobType.DOCS: - res = JobNames.DOCS_CHECK + res = job_digests[JobNames.DOCS_CHECK] elif job_type == self.JobType.SRCS: - res = Build.PACKAGE_RELEASE + # any build type job has the same digest - pick up Build.PACKAGE_RELEASE or Build.PACKAGE_ASAN as a failover + # Build.PACKAGE_RELEASE may not exist in the list if we have reduced CI pipeline + if Build.PACKAGE_RELEASE in job_digests: + res = job_digests[Build.PACKAGE_RELEASE] + elif Build.PACKAGE_ASAN in job_digests: + # failover, if failover does not work - fix it! + res = job_digests[Build.PACKAGE_ASAN] + else: + assert False, "BUG, no build job in digest' list" else: - assert False + assert False, "BUG, New JobType? - please update func" return res def _get_record_file_name( From f973e405eeb4f28a6a937c26d19cad54acd00eb4 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 7 Feb 2024 19:36:28 +0000 Subject: [PATCH 308/985] CI: fixing ARM integration tests #do_not_test --- tests/ci/ci.py | 41 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 9d57f161be3..898d23be843 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1183,13 +1183,13 @@ def _configure_jobs( if batches_to_do: jobs_to_do.append(job) + jobs_params[job] = { + "batches": batches_to_do, + "num_batches": num_batches, + } elif add_to_skip: # treat job as being skipped only if it's controlled by digest jobs_to_skip.append(job) - jobs_params[job] = { - "batches": batches_to_do, - "num_batches": num_batches, - } if not pr_info.is_release_branch(): # randomization bucket filtering (pick one random job from each bucket, for jobs with configured random_bucket property) @@ -1268,6 +1268,33 @@ def _configure_jobs( jobs_to_do = list( set(job for job in jobs_to_do_requested if job not in jobs_to_skip) ) + # if requested job does not have params in jobs_params (it happens for "run_by_label" job) + # we need to add params - otherwise it won't run as "batches" list will be empty + for job in jobs_to_do: + if job not in jobs_params: + num_batches = CI_CONFIG.get_job_config(job).num_batches + jobs_params[job] = { + "batches": list(range(num_batches)), + "num_batches": num_batches, + } + + requested_batches = set() + for token in commit_tokens: + if token.startswith("batch_"): + try: + batches = [ + int(batch) for batch in token.removeprefix("batch_").split("_") + ] + except Exception: + print(f"ERROR: failed to parse commit tag [{token}]") + requested_batches.update(batches) + if requested_batches: + print( + f"NOTE: Only specific job batches were requested [{list(requested_batches)}]" + ) + for job, params in jobs_params.items(): + if params["num_batches"] > 1: + params["batches"] = list(requested_batches) return { "digests": digests, @@ -1372,7 +1399,11 @@ def _update_gh_statuses_action(indata: Dict, s3: S3Helper) -> None: def _fetch_commit_tokens(message: str) -> List[str]: pattern = r"#[\w-]+" matches = [match[1:] for match in re.findall(pattern, message)] - res = [match for match in matches if match in Labels or match.startswith("job_")] + res = [ + match + for match in matches + if match in Labels or match.startswith("job_") or match.startswith("batch_") + ] return res From 4b94bcd54ebd8713db8aefc453edaf15a1aa55ab Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 11 Mar 2024 18:41:17 +0100 Subject: [PATCH 309/985] Update ReadSettings.h --- src/IO/ReadSettings.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index fb8cbaf4a98..c0a63bf51b1 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -99,7 +99,6 @@ struct ReadSettings bool enable_filesystem_cache = true; bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; bool enable_filesystem_cache_log = false; - bool force_read_through_cache_merges = false; size_t filesystem_cache_segments_batch_size = 20; bool use_page_cache_for_disks_without_file_cache = false; From fd68fed0470214d5c8734b0c89fbb779cf9b821f Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 11 Mar 2024 19:21:35 +0100 Subject: [PATCH 310/985] fix flaky 02949_ttl_group_by_bug --- tests/queries/0_stateless/02949_ttl_group_by_bug.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql index 2888f6e7d66..a3d0794c897 100644 --- a/tests/queries/0_stateless/02949_ttl_group_by_bug.sql +++ b/tests/queries/0_stateless/02949_ttl_group_by_bug.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS ttl_group_by_bug; CREATE TABLE ttl_group_by_bug (key UInt32, ts DateTime, value UInt32, min_value UInt32 default value, max_value UInt32 default value) -ENGINE = MergeTree() PARTITION BY toYYYYMM(ts) +ENGINE = MergeTree() ORDER BY (key, toStartOfInterval(ts, toIntervalMinute(3)), ts) TTL ts + INTERVAL 5 MINUTE GROUP BY key, toStartOfInterval(ts, toIntervalMinute(3)) SET value = sum(value), min_value = min(min_value), max_value = max(max_value), ts=min(toStartOfInterval(ts, toIntervalMinute(3))); From a1e5161cee50650a5c4e87ca60e7ed9eb61451b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 19:25:34 +0100 Subject: [PATCH 311/985] Disable sanitizers with 02784_parallel_replicas_automatic_decision_join --- .../02784_parallel_replicas_automatic_decision_join.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh index ef3e6000903..801cd22b79f 100755 --- a/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh +++ b/tests/queries/0_stateless/02784_parallel_replicas_automatic_decision_join.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -## Note: The analyzer doesn't support JOIN with parallel replicas yet +# Tags: no-tsan, no-asan, no-msan +# It's not clear why distributed aggregation is much slower with sanitizers (https://github.com/ClickHouse/ClickHouse/issues/60625) CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 55a82047613c607dedb592fed019d04455e8c8e8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 19:43:30 +0100 Subject: [PATCH 312/985] Fix test --- .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index 9a6f4e7944a..48e98798c51 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1,2 +1,2 @@ -select 42 settings compatibility=NULL; -- {clientError BAD_GET} +select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} From 5a71636411cb358c94e58b7caac18c22104b0e1c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 19:44:52 +0100 Subject: [PATCH 313/985] Fxi --- tests/integration/test_disk_types/test.py | 30 +++++++++++++++++++ .../test_endpoint_macro_substitution/test.py | 9 +++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 86579911b3e..5047cdc605e 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -50,6 +50,36 @@ def test_different_types(cluster): assert ( fields[encrypted_col_ix] == "0" ), f"{fields[name_col_ix]} expected to be non-encrypted!" +def test_different_types(cluster): + node = cluster.instances["node"] + response = TSV.toMat(node.query("SELECT * FROM system.disks FORMAT TSVWithNames")) + + assert len(response) > len(disk_types) # at least one extra line for header + + name_col_ix = response[0].index("name") + type_col_ix = response[0].index("type") + encrypted_col_ix = response[0].index("is_encrypted") + + for fields in response[1:]: # skip header + assert len(fields) >= 7 + expected_disk_type = disk_types.get(fields[name_col_ix], "UNKNOWN") + + if expected_disk_type != "Local": + disk_type = fields[response[0].index("object_storage_type")] + else: + disk_type = fields[type_col_ix] + + assert ( + expected_disk_type == disk_type + ), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" + if "encrypted" in fields[name_col_ix]: + assert ( + fields[encrypted_col_ix] == "1" + ), f"{fields[name_col_ix]} expected to be encrypted!" + else: + assert ( + fields[encrypted_col_ix] == "0" + ), f"{fields[name_col_ix]} expected to be non-encrypted!" def test_select_by_type(cluster): diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index bec3d9de0e3..e161d8e82ff 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -45,8 +45,15 @@ def test_different_types(cluster): for fields in response[1:]: # skip header assert len(fields) >= 7 + expected_disk_type = disk_types.get(fields[name_col_ix], "UNKNOWN") + + if expected_disk_type != "Local": + disk_type = fields[response[0].index("object_storage_type")] + else: + disk_type = fields[type_col_ix] + assert ( - disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix] + expected_disk_type == disk_type ), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" if "encrypted" in fields[name_col_ix]: assert ( From b7b83085d1722b192d802ffc3677ea6d0f03c85a Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 11 Mar 2024 18:49:45 +0000 Subject: [PATCH 314/985] CI: ci.py hot style fix #do_not_test --- tests/ci/ci.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index a699642691b..29906e6571f 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -55,6 +55,8 @@ from report import ERROR, SUCCESS, BuildResult, JobReport from s3_helper import S3Helper from version_helper import get_version_from_repo +# pylint: disable=too-many-lines + @dataclass class PendingState: From 5a400c181b818cf3bbf371eb388a937ee816a66c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 11 Mar 2024 09:49:25 +0100 Subject: [PATCH 315/985] Fix llvm symbolizer on CI In #61011 the whole toolchain installation had been removed from the base image to reduce image sizes, and this is a good thing indeed. However it also breaks the symbolizer for sanitizers, which makes stacktraces unreadable, so let's fix this by getting back llvm package, this should be OK, since it's size is not gigabytes, but only 48MiB (at least for llvm-14): # dpkg -L llvm-14| xargs file | grep -v directory | cut -d: -f1 | xargs du -sch | grep total 48M total Signed-off-by: Azat Khuzhin --- docker/test/base/Dockerfile | 3 +++ docker/test/fasttest/Dockerfile | 3 --- docker/test/util/Dockerfile | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 55229e893de..2317f84e0cb 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -33,6 +33,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_m ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' +# for external_symbolizer_path +RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer + RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 ENV LC_ALL en_US.UTF-8 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 62cdcc3f830..912ff191e57 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -14,7 +14,6 @@ RUN apt-get update \ libclang-${LLVM_VERSION}-dev \ libclang-rt-${LLVM_VERSION}-dev \ lld-${LLVM_VERSION} \ - llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ lsof \ ninja-build \ @@ -37,8 +36,6 @@ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3 # This symlink is required by gcc to find the lld linker RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld -# for external_symbolizer_path -RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer # FIXME: workaround for "The imported target "merge-fdata" references the file" error # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 4f2dc9df849..5446adf3793 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -26,6 +26,8 @@ RUN apt-get update \ && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \ && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \ /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* From 9b055c3a43039387b42e755efddd83b9a8934ca6 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 20:38:30 +0100 Subject: [PATCH 316/985] Use assert_cast to prevent nullptr dereference on bad column types in FunctionsConversion --- src/Functions/FunctionsConversion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 1522e76893e..f338af28240 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -4561,7 +4561,7 @@ arguments, result_type, input_rows_count); \ if (from_low_cardinality) { - const auto * col_low_cardinality = typeid_cast(arguments[0].column.get()); + const auto * col_low_cardinality = assert_cast(arguments[0].column.get()); if (skip_not_null_check && col_low_cardinality->containsNull()) throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); @@ -4586,7 +4586,7 @@ arguments, result_type, input_rows_count); \ if (to_low_cardinality) { auto res_column = to_low_cardinality->createColumn(); - auto * col_low_cardinality = typeid_cast(res_column.get()); + auto * col_low_cardinality = assert_cast(res_column.get()); if (from_low_cardinality && !src_converted_to_full_column) { From 3a26b9c89ee3083884fde341c2af418bcde2f4cf Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 11 Mar 2024 19:42:25 +0000 Subject: [PATCH 317/985] impl --- .../0_stateless/02887_mutations_subcolumns.reference | 6 +++--- tests/queries/0_stateless/02887_mutations_subcolumns.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02887_mutations_subcolumns.reference b/tests/queries/0_stateless/02887_mutations_subcolumns.reference index c2d6cbbd225..1ccc83b48a3 100644 --- a/tests/queries/0_stateless/02887_mutations_subcolumns.reference +++ b/tests/queries/0_stateless/02887_mutations_subcolumns.reference @@ -5,6 +5,6 @@ 4 ttt 5 ttt 6 ttt -{"a":"1","obj":{"k1":1,"k2":null,"k3":null}} -{"a":"3","obj":{"k1":null,"k2":null,"k3":1}} -{"a":"1","obj":{"k1":1,"k2":null,"k3":null}} +1 [('k1',1)] +3 [('k3',1)] +1 [('k1',1)] diff --git a/tests/queries/0_stateless/02887_mutations_subcolumns.sql b/tests/queries/0_stateless/02887_mutations_subcolumns.sql index a01158e1b06..87b3009e929 100644 --- a/tests/queries/0_stateless/02887_mutations_subcolumns.sql +++ b/tests/queries/0_stateless/02887_mutations_subcolumns.sql @@ -40,9 +40,9 @@ INSERT INTO t_mutations_subcolumns VALUES (2, '{"k2": 1}'); INSERT INTO t_mutations_subcolumns VALUES (3, '{"k3": 1}'); ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k2 = 1; -SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; +SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a; ALTER TABLE t_mutations_subcolumns DELETE WHERE isNull(obj.k1); -SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; +SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a; DROP TABLE t_mutations_subcolumns; From 2e74685ba6ea8a3cc32ff0e21d0ee657517ef5a4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Mar 2024 19:58:43 +0000 Subject: [PATCH 318/985] Make variant tests a bit faster --- .../02941_variant_type_2.reference | 80 +++++++++---------- .../0_stateless/02941_variant_type_2.sh | 12 +-- ...different_local_and_global_order.reference | 30 +++---- ...e_with_different_local_and_global_order.sh | 8 +- 4 files changed, 65 insertions(+), 65 deletions(-) diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 4b6d53c52ac..20a5176cb5e 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -1,51 +1,51 @@ Memory test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 MergeTree compact test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 MergeTree wide test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index 509c537e7fc..d1fa0a777c9 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -12,12 +12,12 @@ CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspic function test4_insert() { echo "test4 insert" - $CH_CLIENT -nmq "insert into test select number, NULL from numbers(200000); -insert into test select number + 200000, number from numbers(200000); -insert into test select number + 400000, 'str_' || toString(number) from numbers(200000); -insert into test select number + 600000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(200000); -insert into test select number + 800000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(200000); -insert into test select number + 1000000, range(number % 20 + 1)::Array(UInt64) from numbers(200000);" + $CH_CLIENT -nmq "insert into test select number, NULL from numbers(100000); +insert into test select number + 100000, number from numbers(100000); +insert into test select number + 200000, 'str_' || toString(number) from numbers(100000); +insert into test select number + 300000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(100000); +insert into test select number + 400000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(100000); +insert into test select number + 500000, range(number % 20 + 1)::Array(UInt64) from numbers(100000);" } function test4_select diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference index 1736a307c42..4109a88997c 100644 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference @@ -44,9 +44,9 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test1 insert @@ -136,14 +136,14 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test1 insert @@ -233,12 +233,12 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh index 3bb37719a3f..1d88757a5d6 100755 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh @@ -29,10 +29,10 @@ function test1_select() function test2_insert() { echo "test2 insert" - $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(1000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(2000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(3000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(200000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(400000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(600000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" } function test2_select() From a90a6e9a271515dec58e4d4f716bcd591f245c00 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 21:05:44 +0100 Subject: [PATCH 319/985] Revert "Don't allow to set max_parallel_replicas to 0 as it doesn't make sense" --- src/Client/ConnectionPoolWithFailover.cpp | 10 ---------- src/Client/HedgedConnectionsFactory.cpp | 6 +----- src/Client/HedgedConnectionsFactory.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 4 ++-- .../03001_max_parallel_replicas_zero_value.reference | 0 .../03001_max_parallel_replicas_zero_value.sql | 5 ----- 7 files changed, 5 insertions(+), 24 deletions(-) delete mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference delete mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index ad8ed0067d8..492fd4ae9e2 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -21,7 +21,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ALL_CONNECTION_TRIES_FAILED; - extern const int BAD_ARGUMENTS; } @@ -192,20 +191,11 @@ std::vector ConnectionPoolWithFailover::g max_entries = nested_pools.size(); } else if (pool_mode == PoolMode::GET_ONE) - { max_entries = 1; - } else if (pool_mode == PoolMode::GET_MANY) - { - if (settings.max_parallel_replicas == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - max_entries = settings.max_parallel_replicas; - } else - { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); - } if (!priority_func) priority_func = makeGetPriorityFunc(settings); diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 703cc1f8821..f5b074a0257 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -19,7 +19,6 @@ namespace ErrorCodes extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } HedgedConnectionsFactory::HedgedConnectionsFactory( @@ -83,10 +82,7 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { - if (max_parallel_replicas == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - - max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); + max_entries = max_parallel_replicas; break; } } diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index dd600d58e1e..ce7b553acdd 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -158,7 +158,7 @@ private: /// checking the number of requested replicas that are still in process). size_t requested_connections_count = 0; - const size_t max_parallel_replicas = 1; + const size_t max_parallel_replicas = 0; const bool skip_unavailable_shards = 0; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index e28d8366aa7..bcedba7346d 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -947,7 +947,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() if (number_of_replicas_to_use <= 1) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{1}); + context->setSetting("max_parallel_replicas", UInt64{0}); LOG_DEBUG(log, "Disabling parallel replicas because there aren't enough rows to read"); return true; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 0fe943e0bc7..7b3fb0c5c91 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -295,7 +295,7 @@ bool applyTrivialCountIfPossible( /// The query could use trivial count if it didn't use parallel replicas, so let's disable it query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - query_context->setSetting("max_parallel_replicas", UInt64{1}); + query_context->setSetting("max_parallel_replicas", UInt64{0}); LOG_TRACE(getLogger("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); } @@ -756,7 +756,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { planner_context->getMutableQueryContext()->setSetting( "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{0}); LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); } else if (number_of_replicas_to_use < settings.max_parallel_replicas) diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql deleted file mode 100644 index 611aa4777ba..00000000000 --- a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql +++ /dev/null @@ -1,5 +0,0 @@ -drop table if exists test_d; -create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); -select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{serverError BAD_ARGUMENTS} -drop table test_d; - From 120a1fdb5f817b442bf659da243407fb7003eaa1 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:24:33 -0300 Subject: [PATCH 320/985] Improves varPop docs. Adds varPopStable. --- .../aggregate-functions/reference/varpop.md | 99 +++++++++++++++++-- 1 file changed, 91 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 751688b0830..5f18bdc30f6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,16 +1,99 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/varpop +title: "varPop" +slug: "/en/sql-reference/aggregate-functions/reference/varpop" sidebar_position: 32 --- -# varPop(x) +This page covers the `varPop` and `varPopStable` functions available in ClickHouse. -Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +## varPop -In other words, dispersion for a set of values. Returns `Float64`. +Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. -Alias: `VAR_POP`. +**Syntax** -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +```sql +covarPop(x, y) +``` + +**Parameters** + +- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) +- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). + +**Example** + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); + +SELECT + covarPop(x, y) AS covar_pop +FROM test_data; +``` + +```response +3 +``` + +## varPopStable + +Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. + +**Syntax** + +```sql +covarPopStable(x, y) +``` + +**Parameters** + +- `x`: The first data column. [String literal](../syntax#syntax-string-literal) +- `y`: The second data column. [Expression](../syntax#syntax-expressions) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); + +SELECT + covarPopStable(x, y) AS covar_pop_stable +FROM test_data; +``` + +```response +0.5999999999999999 +``` From 281dc8d29deba2980e6b191edefa3b62114d38a7 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:48:12 -0300 Subject: [PATCH 321/985] Improves varSamp docs. Adds varSampStable docs. --- .../aggregate-functions/reference/varsamp.md | 126 ++++++++++++++++-- 1 file changed, 118 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 9b2b94936ec..e75cb075ff8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,18 +1,128 @@ --- +title: "varSamp" slug: /en/sql-reference/aggregate-functions/reference/varsamp sidebar_position: 33 --- -# varSamp +This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. -Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`. +## varSamp -It represents an unbiased estimate of the variance of a random variable if passed values from its sample. +Calculate the sample variance of a data set. -Returns `Float64`. When `n <= 1`, returns `+∞`. +**Syntax** -Alias: `VAR_SAMP`. +```sql +varSamp(expr) +``` -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. -::: +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) + +**Returned value** + +Returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSamp()` function calculates the sample variance using the following formula: + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: + +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSamp(value) FROM example_table; +``` + +Response: + +```response +0.8650000000000091 +``` + +## varSampStable + +Calculate the sample variance of a data set using a numerically stable algorithm. + +**Syntax** + +```sql +varSampStable(expr) +``` + +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) + +**Returned value** + +The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. + +Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSampStable(value) FROM example_table; +``` + +Response: + +```response +0.865 +``` + +This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. From 563df9bdcb425810a0c2d3ecb11302e22039c048 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 22:49:18 +0100 Subject: [PATCH 322/985] Fix multiple bugs in groupArraySorted --- .../AggregateFunctionGroupArraySorted.cpp | 8 +++++--- .../0_stateless/03008_groupSortedArray_field.reference | 3 +++ .../queries/0_stateless/03008_groupSortedArray_field.sql | 6 ++++++ 3 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03008_groupSortedArray_field.reference create mode 100644 tests/queries/0_stateless/03008_groupSortedArray_field.sql diff --git a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp index 0e9856cfab9..0692ff28f18 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp @@ -310,10 +310,12 @@ public: { for (Field & element : values) { - UInt8 is_null = 0; - readBinary(is_null, buf); - if (!is_null) + bool has_value = 0; + readBinary(has_value, buf); + if (has_value) serialization->deserializeBinary(element, buf, {}); + else + element = Field{}; } } else diff --git a/tests/queries/0_stateless/03008_groupSortedArray_field.reference b/tests/queries/0_stateless/03008_groupSortedArray_field.reference new file mode 100644 index 00000000000..a7f89ebcf58 --- /dev/null +++ b/tests/queries/0_stateless/03008_groupSortedArray_field.reference @@ -0,0 +1,3 @@ +0A01003C79A557B3C43400C4865AA84C3B4B01000650BC18F7DE0B00FAAF43E708213401008ED706EA0A9F13007228F915F5602C0100C692CA8FB81405003A6D357047EB1A01008416B7C3239EE3FF7BE9483CDC61DC01003E133A7C081AF5FFC1ECC583F7E5EA01000000000000000000000000000000000100C4865AA84C3BCBFF3B79A557B3C4B4010024C46EF500F1ECFFDB3B910AFF0ED301005E2FC14EBAEAE5FFA1D03EB14515DA +070109000000010600000001080000000103000000010500000001040000000107000000 AggregateFunction(groupArraySorted(10), Nullable(Decimal(3, 0))) +[3,4,5,6,7,8,9] diff --git a/tests/queries/0_stateless/03008_groupSortedArray_field.sql b/tests/queries/0_stateless/03008_groupSortedArray_field.sql new file mode 100644 index 00000000000..6d2aea641a5 --- /dev/null +++ b/tests/queries/0_stateless/03008_groupSortedArray_field.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61186 +SELECT hex(CAST(unhex('0A01003C79A557B3C43400C4865AA84C3B4B01000650BC18F7DE0B00FAAF43E708213401008ED706EA0A9F13007228F915F5602C0100C692CA8FB81405003A6D357047EB1A01008416B7C3239EE3FF7BE9483CDC61DC01003E133A7C081AF5FFC1ECC583F7E5EA01000000000000000000000000000000000100C4865AA84C3BCBFF3B79A557B3C4B4010024C46EF500F1ECFFDB3B910AFF0ED301005E2FC14EBAEAE5FFA1D03EB14515DA'), + 'AggregateFunction(groupArraySorted(10), Decimal(38, 38))')); + +Select hex(groupArraySortedState(10)((number < 3 ? NULL : number)::Nullable(Decimal(3))) as t), toTypeName(t) from numbers(10); +Select finalizeAggregation(unhex('070109000000010600000001080000000103000000010500000001040000000107000000')::AggregateFunction(groupArraySorted(10), Nullable(Decimal(3, 0)))); From 1b04cc0b4da6d32fd4741ea953dfed060f846d0b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Mar 2024 03:56:10 +0100 Subject: [PATCH 323/985] Fix strange log message --- src/Loggers/Loggers.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 1d17585cc96..cc6e4691737 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -304,6 +304,9 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log log_settings.turn_off_logger = DB::TextLog::shouldTurnOffLogger(); + log_settings.database = config.getString("text_log.database", "system"); + log_settings.table = config.getString("text_log.table", "text_log"); + split->addTextLog(DB::TextLog::getLogQueue(log_settings), text_log_level); } #endif From 29fce4143d1f177efdf1864d41429cfadea22ff1 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 12 Mar 2024 12:07:24 +0800 Subject: [PATCH 324/985] [fix] log level from fatal->error when hardlink and copy both fail --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c76ffeee874..97968f1b9c1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7174,7 +7174,7 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - LOG_FATAL(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail"); + LOG_ERROR(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail"); } From c628eaca8ba19584fe36067dee8e6ec3e8f5cc4b Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Tue, 26 Dec 2023 14:13:07 +0800 Subject: [PATCH 325/985] Consider deleted rows when selecting parts to merge --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 90 +++++++++++++++++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 13 +++ .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 1 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 11 ++- .../MergeTree/MergeTreeDataMergerMutator.h | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 1 + src/Storages/MergeTree/MergeTreeSettings.h | 2 + .../MergeTree/MergedBlockOutputStream.cpp | 5 ++ .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 54 +++++++++++ .../MergeTree/ReplicatedMergeTreeQueue.cpp | 5 +- src/Storages/StorageMergeTree.cpp | 4 +- .../03001_consider_lwd_when_merge.reference | 3 + .../03001_consider_lwd_when_merge.sql | 23 +++++ 15 files changed, 208 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/03001_consider_lwd_when_merge.reference create mode 100644 tests/queries/0_stateless/03001_consider_lwd_when_merge.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3fea6d04944..c099512d636 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -609,6 +609,15 @@ UInt64 IMergeTreeDataPart::getMarksCount() const return index_granularity.getMarksCount(); } +UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const +{ + if (storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge && supportLightweightDeleteMutate() && hasLightweightDelete() + && existing_rows_count.has_value() && existing_rows_count.value() < rows_count && rows_count > 0) + return bytes_on_disk * existing_rows_count.value() / rows_count; + else + return bytes_on_disk; +} + size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const { auto checksum = checksums.files.find(file_name); @@ -691,6 +700,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks calculateColumnsAndSecondaryIndicesSizesOnDisk(); loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. + loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`. loadPartitionAndMinMaxIndex(); if (!parent_part) { @@ -1313,6 +1323,86 @@ void IMergeTreeDataPart::loadRowsCount() } } +void IMergeTreeDataPart::loadExistingRowsCount() +{ + if (existing_rows_count.has_value()) + return; + + if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate() + || !hasLightweightDelete()) + existing_rows_count = rows_count; + else + existing_rows_count = readExistingRowsCount(); +} + +UInt64 IMergeTreeDataPart::readExistingRowsCount() +{ + const size_t total_mark = getMarksCount(); + if (!total_mark) + return rows_count; + + NamesAndTypesList cols; + cols.push_back(LightweightDeleteDescription::FILTER_COLUMN); + + StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); + StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + + MergeTreeReaderPtr reader = getReader( + cols, + storage_snapshot_ptr, + MarkRanges{MarkRange(0, total_mark)}, + nullptr, + storage.getContext()->getMarkCache().get(), + std::make_shared(), + MergeTreeReaderSettings{}, + ValueSizeMap{}, + ReadBufferFromFileBase::ProfileCallback{}); + + if (!reader) + { + LOG_WARNING(storage.log, "Create reader failed while reading existing rows count"); + return rows_count; + } + + size_t current_mark = 0; + bool continue_reading = false; + size_t current_row = 0; + size_t existing_count = 0; + + while (current_row < rows_count) + { + size_t rows_to_read = index_granularity.getMarkRows(current_mark); + continue_reading = (current_mark != 0); + + Columns result; + result.resize(1); + + size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result); + if (!rows_read) + { + LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name); + return rows_count; + } + + current_row += rows_read; + current_mark += (rows_to_read == rows_read); + + const ColumnUInt8 * row_exists_col = typeid_cast(result[0].get()); + if (!row_exists_col) + { + LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name); + return rows_count; + } + + for (UInt8 row_exists : row_exists_col->getData()) + if (row_exists) + existing_count++; + } + + LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_count); + return existing_count; +} + void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files) { files.push_back("count.txt"); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index aaae64a5970..8bd32e777bc 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -231,6 +231,9 @@ public: size_t rows_count = 0; + /// Existing rows count (excluding lightweight deleted rows) + std::optional existing_rows_count; + time_t modification_time = 0; /// When the part is removed from the working set. Changes once. mutable std::atomic remove_time { std::numeric_limits::max() }; @@ -373,6 +376,10 @@ public: void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; } + /// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true + /// Otherwise returns bytes_on_disk + UInt64 getExistingBytesOnDisk() const; + size_t getFileSizeOrZero(const String & file_name) const; auto getFilesChecksums() const { return checksums.files; } @@ -499,6 +506,9 @@ public: /// True if here is lightweight deleted mask file in part. bool hasLightweightDelete() const; + /// Read existing rows count from _row_exists column + UInt64 readExistingRowsCount(); + void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); /// Checks the consistency of this data part. @@ -664,6 +674,9 @@ private: /// For the older format version calculates rows count from the size of a column with a fixed size. void loadRowsCount(); + /// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true. + void loadExistingRowsCount(); + static void appendFilesOfRowsCount(Strings & files); /// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index ae6e398026d..5ef004ec019 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -174,7 +174,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() } /// Start to make the main work - size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts); + size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true); /// Can throw an exception while reserving space. IMergeTreeDataPart::TTLInfos ttl_infos; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d56cf761cf4..5e05f75c1c5 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8261,6 +8261,7 @@ std::pair MergeTreeData::createE new_data_part->setColumns(columns, {}, metadata_snapshot->getMetadataVersion()); new_data_part->rows_count = block.rows(); + new_data_part->existing_rows_count = block.rows(); new_data_part->partition = partition; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1bf1d4a3c29..90144a8cc8f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -405,7 +405,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo } IMergeSelector::Part part_info; - part_info.size = part->getBytesOnDisk(); + part_info.size = part->getExistingBytesOnDisk(); part_info.age = res.current_time - part->modification_time; part_info.level = part->info.level; part_info.data = ∂ @@ -611,7 +611,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti return SelectPartsDecision::CANNOT_SELECT; } - sum_bytes += (*it)->getBytesOnDisk(); + sum_bytes += (*it)->getExistingBytesOnDisk(); prev_it = it; ++it; @@ -793,7 +793,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart } -size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts) +size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge) { size_t res = 0; time_t current_time = std::time(nullptr); @@ -804,7 +804,10 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData:: if (part_max_ttl && part_max_ttl <= current_time) continue; - res += part->getBytesOnDisk(); + if (is_merge) + res += part->getExistingBytesOnDisk(); + else + res += part->getBytesOnDisk(); } return static_cast(res * DISK_USAGE_COEFFICIENT_TO_RESERVE); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index f3a3f51b6c3..731c5e1d176 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -193,7 +193,7 @@ public: /// The approximate amount of disk space needed for merge or mutation. With a surplus. - static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts); + static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge); private: /** Select all parts belonging to the same partition. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index fdac16ae19a..2ba74e44b40 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -537,6 +537,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( new_data_part->setColumns(columns, infos, metadata_snapshot->getMetadataVersion()); new_data_part->rows_count = block.rows(); + new_data_part->existing_rows_count = block.rows(); new_data_part->partition = std::move(partition); new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 925dc973dc3..ea54f61b4b6 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -42,6 +42,7 @@ struct Settings; M(UInt64, compact_parts_max_bytes_to_buffer, 128 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ + M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ @@ -79,6 +80,7 @@ struct Settings; M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ + M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f2fe2e0f255..d8555d69788 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -188,6 +188,11 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->index_granularity = writer->getIndexGranularity(); new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); + /// In mutation, existing_rows_count is already calculated in PartMergerWriter + /// In merge situation, lightweight deleted rows was physically deleted, existing_rows_count equals rows_count + if (!new_part->existing_rows_count.has_value()) + new_part->existing_rows_count = rows_count; + if (default_codec != nullptr) new_part->default_codec = default_codec; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index a9ff687fe4d..620b0e34c6a 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -49,7 +49,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } /// TODO - some better heuristic? - size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}); + size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}, false); if (entry.create_time + storage_settings_ptr->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr) && estimated_space_for_result >= storage_settings_ptr->prefer_fetch_merged_part_size_threshold) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 150cc27c369..3d31d2f05db 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -60,6 +60,26 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } +static UInt64 getExistingRowsCount(const Block & block) +{ + auto column = block.getByName(LightweightDeleteDescription::FILTER_COLUMN.name).column; + const ColumnUInt8 * row_exists_col = typeid_cast(column.get()); + + if (!row_exists_col) + { + LOG_WARNING(&Poco::Logger::get("MutationHelpers::getExistingRowsCount"), "_row_exists column type is not UInt8"); + return block.rows(); + } + + UInt64 existing_count = 0; + + for (UInt8 row_exists : row_exists_col->getData()) + if (row_exists) + existing_count++; + + return existing_count; +} + /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. * Other is just simple drop/renames, so they can be executed without interpreter. @@ -997,6 +1017,9 @@ struct MutationContext bool need_prefix = true; scope_guard temporary_directory_lock; + + /// Whether this mutation contains lightweight delete + bool has_lightweight_delete; }; using MutationContextPtr = std::shared_ptr; @@ -1191,6 +1214,7 @@ public: } case State::SUCCESS: { + finalize(); return false; } } @@ -1226,6 +1250,11 @@ private: const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; + + /// Existing rows count calculated during part writing. + /// It is initialized in prepare(), calculated in mutateOriginalPartAndPrepareProjections() + /// and set to new_data_part in finalize() + size_t existing_rows_count; }; @@ -1238,6 +1267,8 @@ void PartMergerWriter::prepare() // We split the materialization into multiple stages similar to the process of INSERT SELECT query. projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } + + existing_rows_count = 0; } @@ -1251,6 +1282,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ctx->out->write(cur_block); + if (ctx->has_lightweight_delete) + existing_rows_count += MutationHelpers::getExistingRowsCount(cur_block); + for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; @@ -1340,6 +1374,12 @@ bool PartMergerWriter::iterateThroughAllProjections() return true; } +void PartMergerWriter::finalize() +{ + if (ctx->has_lightweight_delete) + ctx->new_data_part->existing_rows_count = existing_rows_count; +} + class MutateAllPartColumnsTask : public IExecutableTask { public: @@ -2185,6 +2225,20 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); + if (ctx->updated_header.has(LightweightDeleteDescription::FILTER_COLUMN.name)) + { + /// This mutation contains lightweight delete, reset existing_rows_count of new data part to 0 + /// It will be updated while writing _row_exists column + ctx->has_lightweight_delete = true; + } + else + { + ctx->has_lightweight_delete = false; + + /// This mutation does not contains lightweight delete, copy existing_rows_count from source part + ctx->new_data_part->existing_rows_count = ctx->source_part->existing_rows_count.value_or(ctx->source_part->rows_count); + } + /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 858eae4afd9..42f564f40da 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1350,7 +1350,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) { - sum_parts_size_in_bytes += part->getBytesOnDisk(); + if (entry.type == LogEntry::MERGE_PARTS) + sum_parts_size_in_bytes += part->getExistingBytesOnDisk(); + else + sum_parts_size_in_bytes += part->getBytesOnDisk(); if (entry.type == LogEntry::MUTATE_PART && !storage.mutation_backoff_policy.partCanBeMutated(part->name)) { diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 663e7f435b7..c816a6f0dce 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1113,7 +1113,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (isTTLMergeType(future_part->merge_type)) getContext()->getMergeList().bookMergeWithTTL(); - merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false); + merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts, true), *this, metadata_snapshot, false); return std::make_shared(future_part, std::move(merging_tagger), std::make_shared()); } @@ -1336,7 +1336,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( future_part->name = part->getNewName(new_part_info); future_part->part_format = part->getFormat(); - tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true); + tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true); return std::make_shared(future_part, std::move(tagger), commands, txn); } } diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference b/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference new file mode 100644 index 00000000000..19920de3d3c --- /dev/null +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference @@ -0,0 +1,3 @@ +2 +2 +1 diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql new file mode 100644 index 00000000000..a65e8877020 --- /dev/null +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS lwd_merge; + +CREATE TABLE lwd_merge (id UInt64 CODEC(NONE)) + ENGINE = MergeTree ORDER BY id +SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_for_part_size_in_merge = 0; + +INSERT INTO lwd_merge SELECT number FROM numbers(10000); +INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000); + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +DELETE FROM lwd_merge WHERE id % 10 > 0; + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1; + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +DROP TABLE IF EXISTS lwd_merge; From 4ad8141a162b3b7735e2f08c069e98b9c2ba2382 Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Wed, 28 Feb 2024 19:54:21 -0600 Subject: [PATCH 326/985] Maintain compatibility of estimateNeededDiskSpace() Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeDataMergerMutator.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 90144a8cc8f..53d49b51e8f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -793,7 +793,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart } -size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge) +size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & account_for_deleted) { size_t res = 0; time_t current_time = std::time(nullptr); @@ -804,7 +804,7 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData:: if (part_max_ttl && part_max_ttl <= current_time) continue; - if (is_merge) + if (account_for_deleted) res += part->getExistingBytesOnDisk(); else res += part->getBytesOnDisk(); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 731c5e1d176..669ee040af3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -193,7 +193,7 @@ public: /// The approximate amount of disk space needed for merge or mutation. With a surplus. - static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge); + static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & account_for_deleted = false); private: /** Select all parts belonging to the same partition. From 10c7ea7a29d8426fcf4d0ca09c778cdd3e56fbbd Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 12 Mar 2024 14:32:07 +0800 Subject: [PATCH 327/985] [debug] fast test again --- src/Storages/StorageMergeTree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 055a48ad998..928ee094583 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2083,7 +2083,6 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con MergeTreePartInfo dst_part_info(partition_id, temp_index, temp_index, src_part->info.level); IDataPartStorage::ClonePartParams clone_params{.txn = local_context->getCurrentTransaction()}; - bool on_same_disk = false; for (const DiskPtr & disk : this->getStoragePolicy()->getDisks()) if (disk->getName() == src_part->getDataPartStorage().getDiskName()) From 05969a39f390445c8d0df43b7077e0eb81db3538 Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Tue, 12 Mar 2024 14:45:25 +0800 Subject: [PATCH 328/985] resolve conflicts --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 +++-- src/Storages/MergeTree/MutateTask.cpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c099512d636..5fede923252 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1342,7 +1342,7 @@ UInt64 IMergeTreeDataPart::readExistingRowsCount() return rows_count; NamesAndTypesList cols; - cols.push_back(LightweightDeleteDescription::FILTER_COLUMN); + cols.emplace_back(RowExistsColumn::name, RowExistsColumn::type); StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); @@ -1351,7 +1351,8 @@ UInt64 IMergeTreeDataPart::readExistingRowsCount() cols, storage_snapshot_ptr, MarkRanges{MarkRange(0, total_mark)}, - nullptr, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, storage.getContext()->getMarkCache().get(), std::make_shared(), MergeTreeReaderSettings{}, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3d31d2f05db..4d1e60f450e 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -62,7 +62,7 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis static UInt64 getExistingRowsCount(const Block & block) { - auto column = block.getByName(LightweightDeleteDescription::FILTER_COLUMN.name).column; + auto column = block.getByName(RowExistsColumn::name).column; const ColumnUInt8 * row_exists_col = typeid_cast(column.get()); if (!row_exists_col) @@ -2225,7 +2225,7 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - if (ctx->updated_header.has(LightweightDeleteDescription::FILTER_COLUMN.name)) + if (ctx->updated_header.has(RowExistsColumn::name)) { /// This mutation contains lightweight delete, reset existing_rows_count of new data part to 0 /// It will be updated while writing _row_exists column From c1b94b2170acbf72d066928fd168c18dc571d505 Mon Sep 17 00:00:00 2001 From: unashi Date: Tue, 12 Mar 2024 15:33:43 +0800 Subject: [PATCH 329/985] [debug] fast test again again --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 97968f1b9c1..7fc504d71f1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7174,7 +7174,7 @@ std::pair MergeTreeData::cloneAn } } if (!copy_successful) - LOG_ERROR(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail"); + LOG_ERROR(&Poco::Logger::get("MergeTreeData"), "Hard link fail, clone fail."); } From 5f1991fbef2f959f1d55c62194d948814d199fa9 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 12 Mar 2024 15:53:28 +0800 Subject: [PATCH 330/985] too big translation unit in Aggregator --- src/Common/HashTable/FixedHashMap.h | 3 + .../HashTable/TwoLevelStringHashTable.h | 1 + src/Interpreters/AggregatedData.h | 142 +++ src/Interpreters/AggregatedDataVariants.cpp | 255 ++++ src/Interpreters/AggregatedDataVariants.h | 320 +++++ src/Interpreters/AggregationMethod.cpp | 215 ++++ src/Interpreters/AggregationMethod.h | 320 +++++ src/Interpreters/Aggregator.cpp | 512 ++++---- src/Interpreters/Aggregator.h | 1035 +---------------- 9 files changed, 1541 insertions(+), 1262 deletions(-) create mode 100644 src/Interpreters/AggregatedData.h create mode 100644 src/Interpreters/AggregatedDataVariants.cpp create mode 100644 src/Interpreters/AggregatedDataVariants.h create mode 100644 src/Interpreters/AggregationMethod.cpp create mode 100644 src/Interpreters/AggregationMethod.h diff --git a/src/Common/HashTable/FixedHashMap.h b/src/Common/HashTable/FixedHashMap.h index e835a6fba94..537f37a9e6c 100644 --- a/src/Common/HashTable/FixedHashMap.h +++ b/src/Common/HashTable/FixedHashMap.h @@ -109,6 +109,9 @@ public: using Base::Base; + FixedHashMap() = default; + FixedHashMap(size_t ) {} /// NOLINT + template void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index 54c208c5b60..1ce6b3d02e3 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -38,6 +38,7 @@ public: Impl impls[NUM_BUCKETS]; TwoLevelStringHashTable() = default; + TwoLevelStringHashTable(size_t ) {} /// NOLINT template explicit TwoLevelStringHashTable(const Source & src) diff --git a/src/Interpreters/AggregatedData.h b/src/Interpreters/AggregatedData.h new file mode 100644 index 00000000000..6cd6b190801 --- /dev/null +++ b/src/Interpreters/AggregatedData.h @@ -0,0 +1,142 @@ +#pragma once +#include + +#include +#include +#include +#include +namespace DB +{ +/** Different data structures that can be used for aggregation + * For efficiency, the aggregation data itself is put into the pool. + * Data and pool ownership (states of aggregate functions) + * is acquired later - in `convertToBlocks` function, by the ColumnAggregateFunction object. + * + * Most data structures exist in two versions: normal and two-level (TwoLevel). + * A two-level hash table works a little slower with a small number of different keys, + * but with a large number of different keys scales better, because it allows + * parallelize some operations (merging, post-processing) in a natural way. + * + * To ensure efficient work over a wide range of conditions, + * first single-level hash tables are used, + * and when the number of different keys is large enough, + * they are converted to two-level ones. + * + * PS. There are many different approaches to the effective implementation of parallel and distributed aggregation, + * best suited for different cases, and this approach is just one of them, chosen for a combination of reasons. + */ + +using AggregatedDataWithoutKey = AggregateDataPtr; + +using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize; +using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap; + +using AggregatedDataWithUInt32Key = HashMap>; +using AggregatedDataWithUInt64Key = HashMap>; + +using AggregatedDataWithShortStringKey = StringHashMap; + +using AggregatedDataWithStringKey = HashMapWithSavedHash; + +using AggregatedDataWithKeys128 = HashMap; +using AggregatedDataWithKeys256 = HashMap; + +using AggregatedDataWithUInt32KeyTwoLevel = TwoLevelHashMap>; +using AggregatedDataWithUInt64KeyTwoLevel = TwoLevelHashMap>; + +using AggregatedDataWithShortStringKeyTwoLevel = TwoLevelStringHashMap; + +using AggregatedDataWithStringKeyTwoLevel = TwoLevelHashMapWithSavedHash; + +using AggregatedDataWithKeys128TwoLevel = TwoLevelHashMap; +using AggregatedDataWithKeys256TwoLevel = TwoLevelHashMap; + +/** Variants with better hash function, using more than 32 bits for hash. + * Using for merging phase of external aggregation, where number of keys may be far greater than 4 billion, + * but we keep in memory and merge only sub-partition of them simultaneously. + * TODO We need to switch for better hash function not only for external aggregation, + * but also for huge aggregation results on machines with terabytes of RAM. + */ + +using AggregatedDataWithUInt64KeyHash64 = HashMap>; +using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash; +using AggregatedDataWithKeys128Hash64 = HashMap; +using AggregatedDataWithKeys256Hash64 = HashMap; + +template +struct AggregationDataWithNullKey : public Base +{ + using Base::Base; + + bool & hasNullKeyData() { return has_null_key_data; } + AggregateDataPtr & getNullKeyData() { return null_key_data; } + bool hasNullKeyData() const { return has_null_key_data; } + const AggregateDataPtr & getNullKeyData() const { return null_key_data; } + size_t size() const { return Base::size() + (has_null_key_data ? 1 : 0); } + bool empty() const { return Base::empty() && !has_null_key_data; } + void clear() + { + Base::clear(); + has_null_key_data = false; + } + void clearAndShrink() + { + Base::clearAndShrink(); + has_null_key_data = false; + } + +private: + bool has_null_key_data = false; + AggregateDataPtr null_key_data = nullptr; +}; + +template +struct AggregationDataWithNullKeyTwoLevel : public Base +{ + using Base::Base; + using Base::impls; + + AggregationDataWithNullKeyTwoLevel() = default; + + template + explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other) + { + impls[0].hasNullKeyData() = other.hasNullKeyData(); + impls[0].getNullKeyData() = other.getNullKeyData(); + } + + bool & hasNullKeyData() { return impls[0].hasNullKeyData(); } + AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); } + bool hasNullKeyData() const { return impls[0].hasNullKeyData(); } + const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); } +}; + +template +using HashTableWithNullKey = AggregationDataWithNullKey>; +template +using StringHashTableWithNullKey = AggregationDataWithNullKey>; + +using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey; +using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey; +using AggregatedDataWithNullableUInt32Key = AggregationDataWithNullKey; + + +using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey; +using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey; +using AggregatedDataWithNullableShortStringKey = AggregationDataWithNullKey; + + +using AggregatedDataWithNullableUInt32KeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMap, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; +using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMap, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; + +using AggregatedDataWithNullableShortStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelStringHashMap>; + +using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMapWithSavedHash, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; +} diff --git a/src/Interpreters/AggregatedDataVariants.cpp b/src/Interpreters/AggregatedDataVariants.cpp new file mode 100644 index 00000000000..0c86c58bd3e --- /dev/null +++ b/src/Interpreters/AggregatedDataVariants.cpp @@ -0,0 +1,255 @@ +#include +#include + +namespace ProfileEvents +{ + extern const Event AggregationPreallocatedElementsInHashTables; +} + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; + extern const int LOGICAL_ERROR; + +} +using ColumnsHashing::HashMethodContext; +using ColumnsHashing::HashMethodContextPtr; +using ColumnsHashing::LastElementCacheStats; + +AggregatedDataVariants::AggregatedDataVariants() : aggregates_pools(1, std::make_shared()), aggregates_pool(aggregates_pools.back().get()) {} + +AggregatedDataVariants::~AggregatedDataVariants() +{ + if (aggregator && !aggregator->all_aggregates_has_trivial_destructor) + { + try + { + aggregator->destroyAllAggregateStates(*this); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } +} + +// The std::is_constructible trait isn't suitable here because some classes have template constructors with semantics different from providing size hints. +// Also string hash table variants are not supported due to the fact that both local perf tests and tests in CI showed slowdowns for them. +template +struct HasConstructorOfNumberOfElements : std::false_type +{ +}; + +template +struct HasConstructorOfNumberOfElements> : std::true_type +{ +}; + +template typename ImplTable> +struct HasConstructorOfNumberOfElements> : std::true_type +{ +}; + +template +struct HasConstructorOfNumberOfElements> : std::true_type +{ +}; + +template +struct HasConstructorOfNumberOfElements> : std::true_type +{ +}; + +template