From d1c85f68e8983a516690dffcfe5820d9dfa2b8a4 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 26 Sep 2022 18:29:15 +0800 Subject: [PATCH 01/47] manually snapshot creation for keeper --- docs/en/operations/clickhouse-keeper.md | 8 +++++++- src/Coordination/FourLetterCommand.cpp | 8 ++++++++ src/Coordination/FourLetterCommand.h | 13 +++++++++++++ src/Coordination/KeeperDispatcher.h | 6 ++++++ src/Coordination/KeeperServer.cpp | 5 +++++ src/Coordination/KeeperServer.h | 2 ++ .../test_keeper_four_word_command/test.py | 15 +++++++++++++++ 7 files changed, 56 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 8bf64bca28f..6597e4e5be0 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -123,7 +123,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`. +The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro,csnp`. You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -306,6 +306,12 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` +- `csnp`: Schedule a snapshot creation task. Return `"Snapshot creation scheduled."` if successfully scheduled or Fail to scheduled snapshot creation.` if failed. + +``` +Snapshot creation scheduled. +``` + ## [experimental] Migration from ZooKeeper {#migration-from-zookeeper} Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration: diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index c33630a913b..70009703c5a 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -136,6 +136,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat FourLetterCommandPtr api_version_command = std::make_shared(keeper_dispatcher); factory.registerCommand(api_version_command); + FourLetterCommandPtr create_snapshot_command = std::make_shared(keeper_dispatcher); + factory.registerCommand(create_snapshot_command); + factory.initializeAllowList(keeper_dispatcher); factory.setInitialize(true); } @@ -472,4 +475,9 @@ String ApiVersionCommand::run() return toString(static_cast(Coordination::current_keeper_api_version)); } +String CreateSnapshotCommand::run() +{ + return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation."; +} + } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 8a98b94b33a..25cc281d5e1 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -327,4 +327,17 @@ struct ApiVersionCommand : public IFourLetterCommand String run() override; ~ApiVersionCommand() override = default; }; + +/// Create snapshot manually +struct CreateSnapshotCommand : public IFourLetterCommand +{ + explicit CreateSnapshotCommand(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "csnp"; } + String run() override; + ~CreateSnapshotCommand() override = default; +}; } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 5e2701299f4..9b52721b951 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -201,6 +201,12 @@ public: { keeper_stats.reset(); } + + /// Create snapshot manually + bool createSnapshot() + { + return server->createSnapshot(); + } }; } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 8186ddd0c00..f03f453aada 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -903,4 +903,9 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const return result; } +bool KeeperServer::createSnapshot() +{ + return raft_instance->create_snapshot(); +} + } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 6873ef2a01e..f969e9ee063 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -130,6 +130,8 @@ public: /// Wait configuration update for action. Used by followers. /// Return true if update was successfully received. bool waitConfigurationUpdate(const ConfigUpdateAction & task); + + bool createSnapshot(); }; } diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index e8136d322d3..0995adb199c 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -634,3 +634,18 @@ def test_cmd_wchp(started_cluster): assert "/test_4lw_normal_node_1" in list_data finally: destroy_zk_client(zk) + + +def test_cmd_csnp(started_cluster): + zk = None + try: + wait_nodes() + clear_znodes() + reset_node_stats() + + zk = get_fake_zk(node1.name, timeout=30.0) + + data = send_4lw_cmd(cmd="csnp") + assert data == "Snapshot creation scheduled." + finally: + destroy_zk_client(zk) From dfb2be3a6732aed0890a82b9d79d12dc1b8ea841 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 26 Sep 2022 18:34:53 +0800 Subject: [PATCH 02/47] fix docs --- docs/en/operations/clickhouse-keeper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 6597e4e5be0..1b8b1e02aa8 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -123,7 +123,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro,csnp`. +The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`. You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. From b724b7a74a2b88e10ef08a6aacc32b64a38105cf Mon Sep 17 00:00:00 2001 From: lixuchun Date: Wed, 12 Oct 2022 11:57:35 +0800 Subject: [PATCH 03/47] update docs error --- docs/en/engines/database-engines/replicated.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index f0ef1e981fe..43d1ce5ec3f 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -86,7 +86,7 @@ node1 :) SELECT materialize(hostName()) AS host, groupArray(n) FROM r.d GROUP BY ``` text ┌─hosts─┬─groupArray(n)─┐ -│ node1 │ [1,3,5,7,9] │ +│ node3 │ [1,3,5,7,9] │ │ node2 │ [0,2,4,6,8] │ └───────┴───────────────┘ ``` From 39c88c74e84b1109015ad5b80e164bf57799e3ba Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sat, 22 Oct 2022 22:31:17 +0800 Subject: [PATCH 04/47] check whether last manually created snapshot is done --- contrib/NuRaft | 2 +- docs/en/operations/clickhouse-keeper.md | 8 +++++++- src/Coordination/FourLetterCommand.cpp | 7 ++++++- src/Coordination/FourLetterCommand.h | 14 ++++++++++++++ src/Coordination/KeeperDispatcher.h | 6 ++++++ src/Coordination/KeeperServer.cpp | 16 +++++++++++++++- src/Coordination/KeeperServer.h | 6 ++++++ .../test_keeper_four_word_command/test.py | 9 ++++++--- 8 files changed, 61 insertions(+), 7 deletions(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 1be805e7cb2..e4e746a24eb 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc +Subproject commit e4e746a24eb56861a86f3672771e3308d8c40722 diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 03eddd4f6ed..66b4685bff5 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -309,12 +309,18 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return `"Snapshot creation scheduled."` if successfully scheduled or Fail to scheduled snapshot creation.` if failed. +- `csnp`: Schedule a snapshot creation task. Return `Snapshot creation scheduled.` if successfully scheduled or `Fail to scheduled snapshot creation.` if failed. ``` Snapshot creation scheduled. ``` +- `snpd`: Whether the last successfully scheduled snapshot creation is done. Return `Yes` if true or `No` if false. + +``` +Yes +``` + ## [experimental] Migration from ZooKeeper {#migration-from-zookeeper} Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration: diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 70009703c5a..3d1077ea84c 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -477,7 +477,12 @@ String ApiVersionCommand::run() String CreateSnapshotCommand::run() { - return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation."; + return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation task."; +} + +String CheckSnapshotDoneCommand::run() +{ + return keeper_dispatcher.snapshotDone() ? "Snapshot creation done." : "Fail to scheduled snapshot creation task."; } } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 5001a750d66..28f1d7f153f 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -340,4 +340,18 @@ struct CreateSnapshotCommand : public IFourLetterCommand String run() override; ~CreateSnapshotCommand() override = default; }; + +/// Check whether last manual snapshot done +struct CheckSnapshotDoneCommand : public IFourLetterCommand +{ + explicit CheckSnapshotDoneCommand(KeeperDispatcher & keeper_dispatcher_) + : IFourLetterCommand(keeper_dispatcher_) + { + } + + String name() override { return "snpd"; } + String run() override; + ~CheckSnapshotDoneCommand() override = default; +}; + } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 79212ea3040..48681957c13 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -209,6 +209,12 @@ public: { return server->createSnapshot(); } + + /// Whether the last manually created snapshot is done + bool snapshotDone() + { + return server->snapshotDone(); + } }; } diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index e0186927b54..87ebea0b4ab 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -114,6 +114,7 @@ KeeperServer::KeeperServer( , is_recovering(config.getBool("keeper_server.force_recovery", false)) , keeper_context{std::make_shared()} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) + , last_manual_snapshot_log_idx(0) { if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); @@ -908,7 +909,20 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const bool KeeperServer::createSnapshot() { - return raft_instance->create_snapshot(); + std::lock_guard lock(snapshot_mutex); + if (raft_instance->create_snapshot()) + { + last_manual_snapshot_log_idx = raft_instance->get_last_snapshot_idx(); + LOG_INFO(log, "Successfully schedule a keeper snapshot creation task at log index {}", last_manual_snapshot_log_idx); + return true; + } + return false; +} + +bool KeeperServer::snapshotDone() +{ + std::lock_guard lock(snapshot_mutex); + return last_manual_snapshot_log_idx != 0 && last_manual_snapshot_log_idx == raft_instance->get_last_snapshot_idx(); } } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index ec832199387..11e3b75d127 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -66,6 +66,10 @@ private: const bool create_snapshot_on_exit; + /// Used to check whether the previous manually created snapshot complete. + uint64_t last_manual_snapshot_log_idx; + std::mutex snapshot_mutex; + public: KeeperServer( const KeeperConfigurationAndSettingsPtr & settings_, @@ -133,6 +137,8 @@ public: bool waitConfigurationUpdate(const ConfigUpdateAction & task); bool createSnapshot(); + + bool snapshotDone(); }; } diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 2b2343757bb..bfe0b2a96e4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -598,7 +598,7 @@ def test_cmd_wchp(started_cluster): destroy_zk_client(zk) -def test_cmd_csnp(started_cluster): +def test_cmd_snapshot(started_cluster): zk = None try: wait_nodes() @@ -607,7 +607,10 @@ def test_cmd_csnp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) - data = send_4lw_cmd(cmd="csnp") - assert data == "Snapshot creation scheduled." + create = send_4lw_cmd(cmd="csnp") + assert create == "Snapshot creation scheduled." + + check = send_4lw_cmd(cmd="snpd") + assert (check == "Yes" or check == "No") finally: destroy_zk_client(zk) From 42e391a0191ef046a499fab5c245e5714475cbe7 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sat, 22 Oct 2022 22:47:03 +0800 Subject: [PATCH 05/47] fix test --- tests/integration/test_keeper_four_word_command/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index bfe0b2a96e4..4949d6f70de 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -611,6 +611,6 @@ def test_cmd_snapshot(started_cluster): assert create == "Snapshot creation scheduled." check = send_4lw_cmd(cmd="snpd") - assert (check == "Yes" or check == "No") + assert check == "Yes" or check == "No" finally: destroy_zk_client(zk) From 2f30c817bfb51ae47bb7dde8c15f3f4999d0d924 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 24 Oct 2022 17:23:47 +0800 Subject: [PATCH 06/47] little fix --- src/Coordination/KeeperServer.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 87ebea0b4ab..042ab35d709 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -910,10 +910,11 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const bool KeeperServer::createSnapshot() { std::lock_guard lock(snapshot_mutex); - if (raft_instance->create_snapshot()) + uint64_t log_idx = raft_instance->create_snapshot(); + if (log_idx != 0) { - last_manual_snapshot_log_idx = raft_instance->get_last_snapshot_idx(); - LOG_INFO(log, "Successfully schedule a keeper snapshot creation task at log index {}", last_manual_snapshot_log_idx); + last_manual_snapshot_log_idx = log_idx; + LOG_INFO(log, "Successfully schedule a keeper snapshot creation task at log index {}", log_idx); return true; } return false; From b5d1c4e6574fbc4916056a3c78ea87aa7c74a3f6 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Mon, 24 Oct 2022 20:08:58 +0800 Subject: [PATCH 07/47] replace snpd with lgif --- docs/en/operations/clickhouse-keeper.md | 13 +++++++++---- src/Coordination/FourLetterCommand.cpp | 19 ++++++++++++++++--- src/Coordination/FourLetterCommand.h | 17 ++++++++++++----- src/Coordination/Keeper4LWInfo.h | 22 ++++++++++++++++++++++ src/Coordination/KeeperDispatcher.h | 8 ++++---- src/Coordination/KeeperServer.cpp | 24 +++++++++++++----------- src/Coordination/KeeperServer.h | 8 ++------ 7 files changed, 78 insertions(+), 33 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 66b4685bff5..2ab76e1a1ea 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -309,16 +309,21 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return `Snapshot creation scheduled.` if successfully scheduled or `Fail to scheduled snapshot creation.` if failed. +- `csnp`: Schedule a snapshot creation task. Return `Snapshot creation scheduled with last committed log index xxx.` if successfully scheduled or `Fail to scheduled snapshot creation task.` if failed. ``` -Snapshot creation scheduled. +Snapshot creation scheduled with last committed log index 100. ``` -- `snpd`: Whether the last successfully scheduled snapshot creation is done. Return `Yes` if true or `No` if false. +- `lgif`: Keeper log information. `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot. ``` -Yes +last_log_idx : 101 +last_log_term : 1 +last_committed_log_idx : 100 +leader_committed_log_idx : 101 +target_committed_log_idx : 101 +last_snapshot_idx : 50 ``` ## [experimental] Migration from ZooKeeper {#migration-from-zookeeper} diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 3d1077ea84c..c5841ce3404 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -139,6 +139,9 @@ void FourLetterCommandFactory::registerCommands(KeeperDispatcher & keeper_dispat FourLetterCommandPtr create_snapshot_command = std::make_shared(keeper_dispatcher); factory.registerCommand(create_snapshot_command); + FourLetterCommandPtr log_info_command = std::make_shared(keeper_dispatcher); + factory.registerCommand(log_info_command); + factory.initializeAllowList(keeper_dispatcher); factory.setInitialize(true); } @@ -477,12 +480,22 @@ String ApiVersionCommand::run() String CreateSnapshotCommand::run() { - return keeper_dispatcher.createSnapshot() ? "Snapshot creation scheduled." : "Fail to scheduled snapshot creation task."; + auto log_index = keeper_dispatcher.createSnapshot(); + return log_index > 0 ? "Snapshot creation scheduled with last committed log index " + std::to_string(log_index) + "." + : "Fail to scheduled snapshot creation task."; } -String CheckSnapshotDoneCommand::run() +String LogInfoCommand::run() { - return keeper_dispatcher.snapshotDone() ? "Snapshot creation done." : "Fail to scheduled snapshot creation task."; + KeeperLogInfo log_info = keeper_dispatcher.getKeeperLogInfo(); + StringBuffer ret; + print(ret, "last_log_idx", log_info.last_log_idx); + print(ret, "last_log_term", log_info.last_log_term); + print(ret, "last_committed_log_idx", log_info.last_committed_log_idx); + print(ret, "leader_committed_log_idx", log_info.leader_committed_log_idx); + print(ret, "target_committed_log_idx", log_info.target_committed_log_idx); + print(ret, "last_snapshot_idx", log_info.last_snapshot_idx); + return ret.str(); } } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 28f1d7f153f..99005bab987 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -341,17 +341,24 @@ struct CreateSnapshotCommand : public IFourLetterCommand ~CreateSnapshotCommand() override = default; }; -/// Check whether last manual snapshot done -struct CheckSnapshotDoneCommand : public IFourLetterCommand +/** Raft log information: + * last_log_idx : 101 + * last_log_term : 1 + * last_committed_idx : 100 + * leader_committed_log_idx : 101 + * target_committed_log_idx : 101 + * last_snapshot_idx : 50 + */ +struct LogInfoCommand : public IFourLetterCommand { - explicit CheckSnapshotDoneCommand(KeeperDispatcher & keeper_dispatcher_) + explicit LogInfoCommand(KeeperDispatcher & keeper_dispatcher_) : IFourLetterCommand(keeper_dispatcher_) { } - String name() override { return "snpd"; } + String name() override { return "lgif"; } String run() override; - ~CheckSnapshotDoneCommand() override = default; + ~LogInfoCommand() override = default; }; } diff --git a/src/Coordination/Keeper4LWInfo.h b/src/Coordination/Keeper4LWInfo.h index 7d90152611e..dbddadaefbf 100644 --- a/src/Coordination/Keeper4LWInfo.h +++ b/src/Coordination/Keeper4LWInfo.h @@ -47,4 +47,26 @@ struct Keeper4LWInfo } }; +/// Keeper log information for 4lw commands +struct KeeperLogInfo +{ + /// My last log index in log store. + uint64_t last_log_idx; + + /// My last log term. + uint64_t last_log_term; + + /// My last committed log index in state machine. + uint64_t last_committed_log_idx; + + /// Leader's committed log index from my perspective. + uint64_t leader_committed_log_idx; + + /// Target log index should be committed to. + uint64_t target_committed_log_idx; + + /// The largest committed log index in last snapshot. + uint64_t last_snapshot_idx; +}; + } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 48681957c13..0126bf8a1e5 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -204,16 +204,16 @@ public: keeper_stats.reset(); } - /// Create snapshot manually - bool createSnapshot() + /// Create snapshot manually, return the last committed log index in the snapshot + uint64_t createSnapshot() { return server->createSnapshot(); } /// Whether the last manually created snapshot is done - bool snapshotDone() + KeeperLogInfo getKeeperLogInfo() { - return server->snapshotDone(); + return server->getKeeperLogInfo(); } }; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 042ab35d709..38070938fc5 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -907,23 +907,25 @@ Keeper4LWInfo KeeperServer::getPartiallyFilled4LWInfo() const return result; } -bool KeeperServer::createSnapshot() +uint64_t KeeperServer::createSnapshot() { - std::lock_guard lock(snapshot_mutex); uint64_t log_idx = raft_instance->create_snapshot(); if (log_idx != 0) - { - last_manual_snapshot_log_idx = log_idx; - LOG_INFO(log, "Successfully schedule a keeper snapshot creation task at log index {}", log_idx); - return true; - } - return false; + LOG_INFO(log, "Snapshot creation scheduled with last committed log index {}.", log_idx); + else + LOG_WARNING(log, "Fail to scheduled snapshot creation task."); + return log_idx; } -bool KeeperServer::snapshotDone() +KeeperLogInfo KeeperServer::getKeeperLogInfo() { - std::lock_guard lock(snapshot_mutex); - return last_manual_snapshot_log_idx != 0 && last_manual_snapshot_log_idx == raft_instance->get_last_snapshot_idx(); + KeeperLogInfo log_info; + log_info.last_log_idx = raft_instance->get_last_log_idx(); + log_info.last_log_term = raft_instance->get_last_log_term(); + log_info.leader_committed_log_idx = raft_instance->get_leader_committed_log_idx(); + log_info.target_committed_log_idx = raft_instance->get_target_committed_log_idx(); + log_info.last_snapshot_idx = raft_instance->get_last_snapshot_idx(); + return log_info; } } diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index 11e3b75d127..192c8f470b1 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -66,10 +66,6 @@ private: const bool create_snapshot_on_exit; - /// Used to check whether the previous manually created snapshot complete. - uint64_t last_manual_snapshot_log_idx; - std::mutex snapshot_mutex; - public: KeeperServer( const KeeperConfigurationAndSettingsPtr & settings_, @@ -136,9 +132,9 @@ public: /// Return true if update was successfully received. bool waitConfigurationUpdate(const ConfigUpdateAction & task); - bool createSnapshot(); + uint64_t createSnapshot(); - bool snapshotDone(); + KeeperLogInfo getKeeperLogInfo(); }; } From 9a36a509fe1272b53698eb0707249e3593fc3088 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 25 Oct 2022 17:15:49 +0800 Subject: [PATCH 08/47] fix test --- docs/en/operations/clickhouse-keeper.md | 20 +++++----- src/Coordination/FourLetterCommand.cpp | 25 ++++++++---- src/Coordination/FourLetterCommand.h | 15 ++++--- src/Coordination/Keeper4LWInfo.h | 6 +++ src/Coordination/KeeperServer.cpp | 4 +- .../test_keeper_four_word_command/test.py | 39 ++++++++++++++++--- 6 files changed, 79 insertions(+), 30 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 2ab76e1a1ea..8eee97ed275 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -309,21 +309,23 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return `Snapshot creation scheduled with last committed log index xxx.` if successfully scheduled or `Fail to scheduled snapshot creation task.` if failed. +- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if successfully scheduled or `Fail to scheduled snapshot creation task.` if failed. ``` -Snapshot creation scheduled with last committed log index 100. +100 ``` -- `lgif`: Keeper log information. `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot. +- `lgif`: Keeper log information. `first_log_idx` : my first log index in log store; `first_log_term` : my first log term; `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot. ``` -last_log_idx : 101 -last_log_term : 1 -last_committed_log_idx : 100 -leader_committed_log_idx : 101 -target_committed_log_idx : 101 -last_snapshot_idx : 50 +first_log_idx 1 +first_log_term 1 +last_log_idx 101 +last_log_term 1 +last_committed_log_idx 100 +leader_committed_log_idx 101 +target_committed_log_idx 101 +last_snapshot_idx 50 ``` ## [experimental] Migration from ZooKeeper {#migration-from-zookeeper} diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index c5841ce3404..402270640d2 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -481,20 +481,29 @@ String ApiVersionCommand::run() String CreateSnapshotCommand::run() { auto log_index = keeper_dispatcher.createSnapshot(); - return log_index > 0 ? "Snapshot creation scheduled with last committed log index " + std::to_string(log_index) + "." - : "Fail to scheduled snapshot creation task."; + return log_index > 0 ? std::to_string(log_index) : "Fail to scheduled snapshot creation task."; } String LogInfoCommand::run() { KeeperLogInfo log_info = keeper_dispatcher.getKeeperLogInfo(); StringBuffer ret; - print(ret, "last_log_idx", log_info.last_log_idx); - print(ret, "last_log_term", log_info.last_log_term); - print(ret, "last_committed_log_idx", log_info.last_committed_log_idx); - print(ret, "leader_committed_log_idx", log_info.leader_committed_log_idx); - print(ret, "target_committed_log_idx", log_info.target_committed_log_idx); - print(ret, "last_snapshot_idx", log_info.last_snapshot_idx); + + auto append = [&ret] (String key, uint64_t value) -> void + { + writeText(key, ret); + writeText('\t', ret); + writeText(std::to_string(value), ret); + writeText('\n', ret); + }; + append("first_log_idx", log_info.first_log_idx); + append("first_log_term", log_info.first_log_idx); + append("last_log_idx", log_info.last_log_idx); + append("last_log_term", log_info.last_log_term); + append("last_committed_log_idx", log_info.last_committed_log_idx); + append("leader_committed_log_idx", log_info.leader_committed_log_idx); + append("target_committed_log_idx", log_info.target_committed_log_idx); + append("last_snapshot_idx", log_info.last_snapshot_idx); return ret.str(); } diff --git a/src/Coordination/FourLetterCommand.h b/src/Coordination/FourLetterCommand.h index 99005bab987..a8801474bb0 100644 --- a/src/Coordination/FourLetterCommand.h +++ b/src/Coordination/FourLetterCommand.h @@ -17,6 +17,7 @@ using FourLetterCommandPtr = std::shared_ptr; /// Just like zookeeper Four Letter Words commands, CH Keeper responds to a small set of commands. /// Each command is composed of four letters, these commands are useful to monitor and issue system problems. /// The feature is based on Zookeeper 3.5.9, details is in https://zookeeper.apache.org/doc/r3.5.9/zookeeperAdmin.html#sc_zkCommands. +/// Also we add some additional commands such as csnp, lgif etc. struct IFourLetterCommand { public: @@ -342,12 +343,14 @@ struct CreateSnapshotCommand : public IFourLetterCommand }; /** Raft log information: - * last_log_idx : 101 - * last_log_term : 1 - * last_committed_idx : 100 - * leader_committed_log_idx : 101 - * target_committed_log_idx : 101 - * last_snapshot_idx : 50 + * first_log_idx 1 + * first_log_term 1 + * last_log_idx 101 + * last_log_term 1 + * last_committed_idx 100 + * leader_committed_log_idx 101 + * target_committed_log_idx 101 + * last_snapshot_idx 50 */ struct LogInfoCommand : public IFourLetterCommand { diff --git a/src/Coordination/Keeper4LWInfo.h b/src/Coordination/Keeper4LWInfo.h index dbddadaefbf..105478457cc 100644 --- a/src/Coordination/Keeper4LWInfo.h +++ b/src/Coordination/Keeper4LWInfo.h @@ -50,6 +50,12 @@ struct Keeper4LWInfo /// Keeper log information for 4lw commands struct KeeperLogInfo { + /// My first log index in log store. + uint64_t first_log_idx; + + /// My first log term. + uint64_t first_log_term; + /// My last log index in log store. uint64_t last_log_idx; diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 38070938fc5..bea69ea0ba8 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -114,7 +114,6 @@ KeeperServer::KeeperServer( , is_recovering(config.getBool("keeper_server.force_recovery", false)) , keeper_context{std::make_shared()} , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) - , last_manual_snapshot_log_idx(0) { if (coordination_settings->quorum_reads) LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); @@ -920,8 +919,11 @@ uint64_t KeeperServer::createSnapshot() KeeperLogInfo KeeperServer::getKeeperLogInfo() { KeeperLogInfo log_info; + log_info.first_log_idx = state_manager->load_log_store()->start_index(); + log_info.first_log_term = state_manager->load_log_store()->term_at(log_info.first_log_idx); log_info.last_log_idx = raft_instance->get_last_log_idx(); log_info.last_log_term = raft_instance->get_last_log_term(); + log_info.last_committed_log_idx = raft_instance->get_committed_log_idx(); log_info.leader_committed_log_idx = raft_instance->get_leader_committed_log_idx(); log_info.target_committed_log_idx = raft_instance->get_target_committed_log_idx(); log_info.last_snapshot_idx = raft_instance->get_last_snapshot_idx(); diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index 4949d6f70de..4559904f8b7 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -598,19 +598,46 @@ def test_cmd_wchp(started_cluster): destroy_zk_client(zk) -def test_cmd_snapshot(started_cluster): +def test_cmd_csnp(started_cluster): + zk = None + try: + wait_nodes() + zk = get_fake_zk(node1.name, timeout=30.0) + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="csnp") + try: + int(data) + assert True + except ValueError: + assert False + finally: + destroy_zk_client(zk) + + +def test_cmd_lgif(started_cluster): zk = None try: wait_nodes() clear_znodes() - reset_node_stats() zk = get_fake_zk(node1.name, timeout=30.0) + do_some_action(zk, create_cnt=100) - create = send_4lw_cmd(cmd="csnp") - assert create == "Snapshot creation scheduled." + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="lgif") + print(data) + reader = csv.reader(data.split("\n"), delimiter="\t") + result = {} - check = send_4lw_cmd(cmd="snpd") - assert check == "Yes" or check == "No" + for row in reader: + if len(row) != 0: + result[row[0]] = row[1] + + assert int(result["first_log_idx"]) == 1 + assert int(result["first_log_term"]) == 1 + assert int(result["last_log_idx"]) >= 1 + assert int(result["last_log_term"]) == 1 + assert int(result["last_committed_log_idx"]) >= 1 + assert int(result["leader_committed_log_idx"]) >= 1 + assert int(result["target_committed_log_idx"]) >= 1 + assert int(result["last_snapshot_idx"]) >= 1 finally: destroy_zk_client(zk) From c7a0ebeb05ba81f4259b2c5910ac88c10520cafc Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 25 Oct 2022 17:46:24 +0800 Subject: [PATCH 09/47] little fix --- docs/en/operations/clickhouse-keeper.md | 2 +- src/Coordination/KeeperDispatcher.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 8eee97ed275..269e18023df 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -309,7 +309,7 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if successfully scheduled or `Fail to scheduled snapshot creation task.` if failed. +- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Fail to scheduled snapshot creation task.` if failed. ``` 100 diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 0126bf8a1e5..84345ca1ff5 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -210,7 +210,7 @@ public: return server->createSnapshot(); } - /// Whether the last manually created snapshot is done + /// Get Raft information KeeperLogInfo getKeeperLogInfo() { return server->getKeeperLogInfo(); From 611c2e2bd75614e8eeb9d10933d76d6c5cd9f89b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 25 Oct 2022 13:34:34 +0000 Subject: [PATCH 10/47] Support for optimizing old parts for entire partition only --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 27 ++++++++++++++++++- src/Storages/MergeTree/MergeTreeSettings.h | 1 + .../test.py | 24 ++++++++++++----- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index b0ef1522685..27000796343 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -214,6 +214,14 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// Previous part only in boundaries of partition frame const MergeTreeData::DataPartPtr * prev_part = nullptr; + /// collect min_age for each partition while iterating parts + struct PartitionInfo + { + time_t min_age{std::numeric_limits::max()}; + }; + + std::unordered_map partitions_info; + size_t parts_selected_precondition = 0; for (const MergeTreeData::DataPartPtr & part : data_parts) { @@ -277,6 +285,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( part_info.compression_codec_desc = part->default_codec->getFullCodecDesc(); part_info.shall_participate_in_merges = has_volumes_with_disabled_merges ? part->shallParticipateInMerges(storage_policy) : true; + auto & partition_info = partitions_info[partition_id]; + partition_info.min_age = std::min(partition_info.min_age, part_info.age); + ++parts_selected_precondition; parts_ranges.back().emplace_back(part_info); @@ -333,7 +344,8 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( SimpleMergeSelector::Settings merge_settings; /// Override value from table settings merge_settings.max_parts_to_merge_at_once = data_settings->max_parts_to_merge_at_once; - merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds; + if (!data_settings->min_age_to_force_merge_on_partition_only) + merge_settings.min_age_to_force_merge = data_settings->min_age_to_force_merge_seconds; if (aggressive) merge_settings.base = 1; @@ -347,6 +359,19 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (parts_to_merge.empty()) { + if (data_settings->min_age_to_force_merge_on_partition_only && data_settings->min_age_to_force_merge_seconds) + { + auto best_partition_it = std::max_element( + partitions_info.begin(), + partitions_info.end(), + [](const auto & e1, const auto & e2) { return e1.second.min_age > e2.second.min_age; }); + + if (best_partition_it != partitions_info.end() + && static_cast(best_partition_it->second.min_age) >= data_settings->min_age_to_force_merge_seconds) + return selectAllPartsToMergeWithinPartition( + future_part, can_merge_callback, best_partition_it->first, true, metadata_snapshot, txn, out_disable_reason); + } + if (out_disable_reason) *out_disable_reason = "There is no need to merge parts according to merge selector algorithm"; return SelectPartsDecision::CANNOT_SELECT; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 3fecb85f484..844c1ddbfe5 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -63,6 +63,7 @@ struct Settings; M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ M(UInt64, merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds, 1ULL * 3600 * 24 * 30, "Remove old broken detached parts in the background if they remained intouched for a specified by this setting period of time.", 0) \ M(UInt64, min_age_to_force_merge_seconds, 0, "If all parts in a certain range are older than this value, range will be always eligible for merging. Set to 0 to disable.", 0) \ + M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \ M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \ M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ \ diff --git a/tests/integration/test_merge_tree_optimize_old_parts/test.py b/tests/integration/test_merge_tree_optimize_old_parts/test.py index 7b386eba2c4..87e0ecd8108 100644 --- a/tests/integration/test_merge_tree_optimize_old_parts/test.py +++ b/tests/integration/test_merge_tree_optimize_old_parts/test.py @@ -13,7 +13,7 @@ node = cluster.add_instance( @pytest.fixture(scope="module") -def start_cluster(): +def started_cluster(): try: cluster.start() @@ -42,7 +42,7 @@ def check_expected_part_number(seconds, table_name, expected): assert ok -def test_without_force_merge_old_parts(start_cluster): +def test_without_force_merge_old_parts(started_cluster): node.query( "CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i;" ) @@ -60,13 +60,18 @@ def test_without_force_merge_old_parts(start_cluster): node.query("DROP TABLE test_without_merge;") -def test_force_merge_old_parts(start_cluster): +@pytest.mark.parametrize("partition_only", ["True", "False"]) +def test_force_merge_old_parts(started_cluster, partition_only): node.query( - "CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;" + "CREATE TABLE test_with_merge (i Int64) " + "ENGINE = MergeTree " + "ORDER BY i " + f"SETTINGS min_age_to_force_merge_seconds=5, min_age_to_force_merge_on_partition_only={partition_only};" ) node.query("INSERT INTO test_with_merge SELECT 1") node.query("INSERT INTO test_with_merge SELECT 2") node.query("INSERT INTO test_with_merge SELECT 3") + assert get_part_number("test_with_merge") == TSV("""3\n""") expected = TSV("""1\n""") check_expected_part_number(10, "test_with_merge", expected) @@ -74,15 +79,20 @@ def test_force_merge_old_parts(start_cluster): node.query("DROP TABLE test_with_merge;") -def test_force_merge_old_parts_replicated_merge_tree(start_cluster): +@pytest.mark.parametrize("partition_only", ["True", "False"]) +def test_force_merge_old_parts_replicated_merge_tree(started_cluster, partition_only): node.query( - "CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/testing/test', 'node') ORDER BY i SETTINGS min_age_to_force_merge_seconds=5;" + "CREATE TABLE test_replicated (i Int64) " + "ENGINE = ReplicatedMergeTree('/clickhouse/testing/test', 'node') " + "ORDER BY i " + f"SETTINGS min_age_to_force_merge_seconds=5, min_age_to_force_merge_on_partition_only={partition_only};" ) node.query("INSERT INTO test_replicated SELECT 1") node.query("INSERT INTO test_replicated SELECT 2") node.query("INSERT INTO test_replicated SELECT 3") + assert get_part_number("test_replicated") == TSV("""3\n""") expected = TSV("""1\n""") check_expected_part_number(10, "test_replicated", expected) - node.query("DROP TABLE test_replicated;") + node.query("DROP TABLE test_replicated SYNC;") From 2254bef74a105ec91389d1c84a6805119f6b4fca Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 25 Oct 2022 12:07:07 +0800 Subject: [PATCH 11/47] implement function ascii --- .../functions/string-functions.md | 10 +++ src/Functions/ascii.cpp | 74 +++++++++++++++++++ .../queries/0_stateless/02353_ascii.reference | 2 + tests/queries/0_stateless/02353_ascii.sql | 2 + 4 files changed, 88 insertions(+) create mode 100644 src/Functions/ascii.cpp create mode 100644 tests/queries/0_stateless/02353_ascii.reference create mode 100644 tests/queries/0_stateless/02353_ascii.sql diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a8ba4843279..982ba05f494 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1150,3 +1150,13 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## ascii(s) {#ascii} + +Returns the ASCII code point of the first character of str. The result type is Int32. + +If s is empty, the result is 0. If the first character is not an ASCII character or part of the Latin-1 Supplement range of UTF-16, the result is undefined. + + + + diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp new file mode 100644 index 00000000000..a8a6b9f7226 --- /dev/null +++ b/src/Functions/ascii.cpp @@ -0,0 +1,74 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; +} + +struct AsciiName +{ + static constexpr auto name = "ascii"; +}; + + +struct AsciiImpl +{ + static constexpr auto is_fixed_to_constant = false; + using ReturnType = Int32; + + + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + { + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + res[i] = doAscii(data, prev_offset, offsets[i] - prev_offset - 1); + prev_offset = offsets[i]; + } + } + + [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) + { + throw Exception("vectorFixedToConstant not implemented for function " + std::string(AsciiName::name), ErrorCodes::NOT_IMPLEMENTED); + } + + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + { + size_t size = data.size() / n; + + for (size_t i = 0; i < size; ++i) + { + res[i] = doAscii(data, i * n, n); + } + } + + [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + { + throw Exception("Cannot apply function " + std::string(AsciiName::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + { + throw Exception("Cannot apply function " + std::string(AsciiName::name) + " to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + static Int32 doAscii(const ColumnString::Chars & buf, size_t offset, size_t size) { return size ? static_cast(buf[offset]) : 0; } +}; + +using FunctionAscii = FunctionStringOrArrayToT; + +REGISTER_FUNCTION(Ascii) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02353_ascii.reference b/tests/queries/0_stateless/02353_ascii.reference new file mode 100644 index 00000000000..d44c5c7d87e --- /dev/null +++ b/tests/queries/0_stateless/02353_ascii.reference @@ -0,0 +1,2 @@ +50 +0 diff --git a/tests/queries/0_stateless/02353_ascii.sql b/tests/queries/0_stateless/02353_ascii.sql new file mode 100644 index 00000000000..c1c5d60c447 --- /dev/null +++ b/tests/queries/0_stateless/02353_ascii.sql @@ -0,0 +1,2 @@ +SELECT ascii('234'); +SELECT ascii(''); From add5360a1b17d794760d4f0eac9834c931a10fac Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 25 Oct 2022 12:07:07 +0800 Subject: [PATCH 12/47] implement function ascii --- .../functions/string-functions.md | 10 +++ src/Functions/ascii.cpp | 74 +++++++++++++++++++ .../queries/0_stateless/02353_ascii.reference | 2 + tests/queries/0_stateless/02353_ascii.sql | 2 + 4 files changed, 88 insertions(+) create mode 100644 src/Functions/ascii.cpp create mode 100644 tests/queries/0_stateless/02353_ascii.reference create mode 100644 tests/queries/0_stateless/02353_ascii.sql diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a8ba4843279..982ba05f494 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1150,3 +1150,13 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## ascii(s) {#ascii} + +Returns the ASCII code point of the first character of str. The result type is Int32. + +If s is empty, the result is 0. If the first character is not an ASCII character or part of the Latin-1 Supplement range of UTF-16, the result is undefined. + + + + diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp new file mode 100644 index 00000000000..a8a6b9f7226 --- /dev/null +++ b/src/Functions/ascii.cpp @@ -0,0 +1,74 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; +} + +struct AsciiName +{ + static constexpr auto name = "ascii"; +}; + + +struct AsciiImpl +{ + static constexpr auto is_fixed_to_constant = false; + using ReturnType = Int32; + + + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + { + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + res[i] = doAscii(data, prev_offset, offsets[i] - prev_offset - 1); + prev_offset = offsets[i]; + } + } + + [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) + { + throw Exception("vectorFixedToConstant not implemented for function " + std::string(AsciiName::name), ErrorCodes::NOT_IMPLEMENTED); + } + + static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + { + size_t size = data.size() / n; + + for (size_t i = 0; i < size; ++i) + { + res[i] = doAscii(data, i * n, n); + } + } + + [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + { + throw Exception("Cannot apply function " + std::string(AsciiName::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + { + throw Exception("Cannot apply function " + std::string(AsciiName::name) + " to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + static Int32 doAscii(const ColumnString::Chars & buf, size_t offset, size_t size) { return size ? static_cast(buf[offset]) : 0; } +}; + +using FunctionAscii = FunctionStringOrArrayToT; + +REGISTER_FUNCTION(Ascii) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02353_ascii.reference b/tests/queries/0_stateless/02353_ascii.reference new file mode 100644 index 00000000000..d44c5c7d87e --- /dev/null +++ b/tests/queries/0_stateless/02353_ascii.reference @@ -0,0 +1,2 @@ +50 +0 diff --git a/tests/queries/0_stateless/02353_ascii.sql b/tests/queries/0_stateless/02353_ascii.sql new file mode 100644 index 00000000000..c1c5d60c447 --- /dev/null +++ b/tests/queries/0_stateless/02353_ascii.sql @@ -0,0 +1,2 @@ +SELECT ascii('234'); +SELECT ascii(''); From 97aaebfa1808e9b90258a78de2927c97bae05feb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 26 Oct 2022 10:06:56 +0000 Subject: [PATCH 13/47] Address PR comments --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 5 +- .../__init__.py | 0 .../configs/zookeeper_config.xml | 8 -- .../test.py | 98 ------------------- .../02473_optimize_old_parts.reference | 10 ++ .../0_stateless/02473_optimize_old_parts.sql | 67 +++++++++++++ 6 files changed, 80 insertions(+), 108 deletions(-) delete mode 100644 tests/integration/test_merge_tree_optimize_old_parts/__init__.py delete mode 100644 tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml delete mode 100644 tests/integration/test_merge_tree_optimize_old_parts/test.py create mode 100644 tests/queries/0_stateless/02473_optimize_old_parts.reference create mode 100644 tests/queries/0_stateless/02473_optimize_old_parts.sql diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 27000796343..0f44d1a7da3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -366,8 +366,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( partitions_info.end(), [](const auto & e1, const auto & e2) { return e1.second.min_age > e2.second.min_age; }); - if (best_partition_it != partitions_info.end() - && static_cast(best_partition_it->second.min_age) >= data_settings->min_age_to_force_merge_seconds) + assert(best_partition_it != partitions_info.end()); + + if (static_cast(best_partition_it->second.min_age) >= data_settings->min_age_to_force_merge_seconds) return selectAllPartsToMergeWithinPartition( future_part, can_merge_callback, best_partition_it->first, true, metadata_snapshot, txn, out_disable_reason); } diff --git a/tests/integration/test_merge_tree_optimize_old_parts/__init__.py b/tests/integration/test_merge_tree_optimize_old_parts/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml b/tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml deleted file mode 100644 index 18412349228..00000000000 --- a/tests/integration/test_merge_tree_optimize_old_parts/configs/zookeeper_config.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - zoo1 - 2181 - - - diff --git a/tests/integration/test_merge_tree_optimize_old_parts/test.py b/tests/integration/test_merge_tree_optimize_old_parts/test.py deleted file mode 100644 index 87e0ecd8108..00000000000 --- a/tests/integration/test_merge_tree_optimize_old_parts/test.py +++ /dev/null @@ -1,98 +0,0 @@ -import pytest -import time -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=["configs/zookeeper_config.xml"], - with_zookeeper=True, -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - - yield cluster - finally: - cluster.shutdown() - - -def get_part_number(table_name): - return TSV( - node.query( - f"SELECT count(*) FROM system.parts where table='{table_name}' and active=1" - ) - ) - - -def check_expected_part_number(seconds, table_name, expected): - ok = False - for i in range(int(seconds) * 2): - result = get_part_number(table_name) - if result == expected: - ok = True - break - else: - time.sleep(1) - assert ok - - -def test_without_force_merge_old_parts(started_cluster): - node.query( - "CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i;" - ) - node.query("INSERT INTO test_without_merge SELECT 1") - node.query("INSERT INTO test_without_merge SELECT 2") - node.query("INSERT INTO test_without_merge SELECT 3") - - expected = TSV("""3\n""") - # verify that the parts don't get merged - for i in range(10): - if get_part_number("test_without_merge") != expected: - assert False - time.sleep(1) - - node.query("DROP TABLE test_without_merge;") - - -@pytest.mark.parametrize("partition_only", ["True", "False"]) -def test_force_merge_old_parts(started_cluster, partition_only): - node.query( - "CREATE TABLE test_with_merge (i Int64) " - "ENGINE = MergeTree " - "ORDER BY i " - f"SETTINGS min_age_to_force_merge_seconds=5, min_age_to_force_merge_on_partition_only={partition_only};" - ) - node.query("INSERT INTO test_with_merge SELECT 1") - node.query("INSERT INTO test_with_merge SELECT 2") - node.query("INSERT INTO test_with_merge SELECT 3") - assert get_part_number("test_with_merge") == TSV("""3\n""") - - expected = TSV("""1\n""") - check_expected_part_number(10, "test_with_merge", expected) - - node.query("DROP TABLE test_with_merge;") - - -@pytest.mark.parametrize("partition_only", ["True", "False"]) -def test_force_merge_old_parts_replicated_merge_tree(started_cluster, partition_only): - node.query( - "CREATE TABLE test_replicated (i Int64) " - "ENGINE = ReplicatedMergeTree('/clickhouse/testing/test', 'node') " - "ORDER BY i " - f"SETTINGS min_age_to_force_merge_seconds=5, min_age_to_force_merge_on_partition_only={partition_only};" - ) - node.query("INSERT INTO test_replicated SELECT 1") - node.query("INSERT INTO test_replicated SELECT 2") - node.query("INSERT INTO test_replicated SELECT 3") - assert get_part_number("test_replicated") == TSV("""3\n""") - - expected = TSV("""1\n""") - check_expected_part_number(10, "test_replicated", expected) - - node.query("DROP TABLE test_replicated SYNC;") diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.reference b/tests/queries/0_stateless/02473_optimize_old_parts.reference new file mode 100644 index 00000000000..6767887ba86 --- /dev/null +++ b/tests/queries/0_stateless/02473_optimize_old_parts.reference @@ -0,0 +1,10 @@ +Without merge +6 +With merge any part range +1 +With merge partition only +1 +With merge replicated any part range +1 +With merge replicated partition only +1 diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sql b/tests/queries/0_stateless/02473_optimize_old_parts.sql new file mode 100644 index 00000000000..545bd58dddc --- /dev/null +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sql @@ -0,0 +1,67 @@ +DROP TABLE IF EXISTS test_without_merge; +DROP TABLE IF EXISTS test_with_merge; +DROP TABLE IF EXISTS test_replicated; + +SELECT 'Without merge'; + +CREATE TABLE test_without_merge (i Int64) ENGINE = MergeTree ORDER BY i; +INSERT INTO test_without_merge SELECT 1; +INSERT INTO test_without_merge SELECT 2; +INSERT INTO test_without_merge SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT count(*) FROM system.parts where table='test_without_merge' and active=1; + +DROP TABLE test_without_merge; + +SELECT 'With merge any part range'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=false; +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +INSERT INTO test_with_merge SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT count(*) FROM system.parts where table='test_with_merge' and active=1; + +DROP TABLE test_with_merge; + +SELECT 'With merge partition only'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +INSERT INTO test_with_merge SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT count(*) FROM system.parts where table='test_with_merge' and active=1; + +DROP TABLE test_with_merge; + +SELECT 'With merge replicated any part range'; + +CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473', 'node') ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=false; +INSERT INTO test_replicated SELECT 1; +INSERT INTO test_replicated SELECT 2; +INSERT INTO test_replicated SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT count(*) FROM system.parts where table='test_replicated' and active=1; + +DROP TABLE test_replicated; + +SELECT 'With merge replicated partition only'; + +CREATE TABLE test_replicated (i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test02473_partition_only', 'node') ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; +INSERT INTO test_replicated SELECT 1; +INSERT INTO test_replicated SELECT 2; +INSERT INTO test_replicated SELECT 3; + +SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT count(*) FROM system.parts where table='test_replicated' and active=1; + +DROP TABLE test_replicated; From 48c37c52e6c2e60423e399f0f457c6fd0bf8dbcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Wed, 26 Oct 2022 18:18:04 +0800 Subject: [PATCH 14/47] Update src/Functions/ascii.cpp Co-authored-by: Vladimir C --- src/Functions/ascii.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index a8a6b9f7226..592cbe5f1c4 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -68,7 +68,16 @@ using FunctionAscii = FunctionStringOrArrayToT({}, FunctionFactory::CaseInsensitive); + factory.registerFunction( + { + R"( +Returns the ASCII code point of the first character of str. The result type is Int32. + +If s is empty, the result is 0. If the first character is not an ASCII character or part of the Latin-1 Supplement range of UTF-16, the result is undefined) + )", + Documentation::Examples{{"ascii", "SELECT ascii('234')"}}, + Documentation::Categories{"String"} + }, FunctionFactory::CaseInsensitive); } } From c7e5eb756ba0ca32d66ee50db80cfdf3cd765cfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E6=89=AC?= <654010905@qq.com> Date: Wed, 26 Oct 2022 18:18:15 +0800 Subject: [PATCH 15/47] Update src/Functions/ascii.cpp Co-authored-by: Vladimir C --- src/Functions/ascii.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index 592cbe5f1c4..38ba7f0bbac 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -61,7 +61,10 @@ struct AsciiImpl } private: - static Int32 doAscii(const ColumnString::Chars & buf, size_t offset, size_t size) { return size ? static_cast(buf[offset]) : 0; } + static Int32 doAscii(const ColumnString::Chars & buf, size_t offset, size_t size) + { + return size ? static_cast(buf[offset]) : 0; + } }; using FunctionAscii = FunctionStringOrArrayToT; From 598b45f1ec2194183ec20418d7b7caf7192429be Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Oct 2022 08:06:39 +0000 Subject: [PATCH 16/47] Add test for partition only and new parts --- .../02473_optimize_old_parts.reference | 2 ++ .../0_stateless/02473_optimize_old_parts.sql | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.reference b/tests/queries/0_stateless/02473_optimize_old_parts.reference index e80812bddcd..9002d73ff27 100644 --- a/tests/queries/0_stateless/02473_optimize_old_parts.reference +++ b/tests/queries/0_stateless/02473_optimize_old_parts.reference @@ -8,3 +8,5 @@ With merge replicated any part range 1 With merge replicated partition only 1 +With merge partition only and new parts +3 diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sql b/tests/queries/0_stateless/02473_optimize_old_parts.sql index d673ef22f67..76c1ba73097 100644 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sql +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sql @@ -65,3 +65,21 @@ SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; DROP TABLE test_replicated; + +SELECT 'With merge partition only and new parts'; + +CREATE TABLE test_with_merge (i Int64) ENGINE = MergeTree ORDER BY i +SETTINGS min_age_to_force_merge_seconds=3, min_age_to_force_merge_on_partition_only=true; +SYSTEM STOP MERGES test_with_merge; +-- These three parts will have min_age=6 at the time of merge +INSERT INTO test_with_merge SELECT 1; +INSERT INTO test_with_merge SELECT 2; +SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +-- These three parts will have min_age=0 at the time of merge +-- and so, nothing will be merged. +INSERT INTO test_with_merge SELECT 3; +SYSTEM START MERGES test_with_merge; + +SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; + +DROP TABLE test_with_merge; From 0fe0aa44d0ec318d3e9c35aa5f5af964fa28dc5e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 28 Oct 2022 07:38:57 +0000 Subject: [PATCH 17/47] Increase wait time --- .../queries/0_stateless/02473_optimize_old_parts.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sql b/tests/queries/0_stateless/02473_optimize_old_parts.sql index 76c1ba73097..106175ab6f5 100644 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sql +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sql @@ -9,7 +9,7 @@ INSERT INTO test_without_merge SELECT 1; INSERT INTO test_without_merge SELECT 2; INSERT INTO test_without_merge SELECT 3; -SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_without_merge' AND active; DROP TABLE test_without_merge; @@ -22,7 +22,7 @@ INSERT INTO test_with_merge SELECT 1; INSERT INTO test_with_merge SELECT 2; INSERT INTO test_with_merge SELECT 3; -SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; DROP TABLE test_with_merge; @@ -35,7 +35,7 @@ INSERT INTO test_with_merge SELECT 1; INSERT INTO test_with_merge SELECT 2; INSERT INTO test_with_merge SELECT 3; -SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; DROP TABLE test_with_merge; @@ -48,7 +48,7 @@ INSERT INTO test_replicated SELECT 1; INSERT INTO test_replicated SELECT 2; INSERT INTO test_replicated SELECT 3; -SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; DROP TABLE test_replicated; @@ -61,7 +61,7 @@ INSERT INTO test_replicated SELECT 1; INSERT INTO test_replicated SELECT 2; INSERT INTO test_replicated SELECT 3; -SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; SELECT count(*) FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; DROP TABLE test_replicated; @@ -74,7 +74,7 @@ SYSTEM STOP MERGES test_with_merge; -- These three parts will have min_age=6 at the time of merge INSERT INTO test_with_merge SELECT 1; INSERT INTO test_with_merge SELECT 2; -SELECT sleepEachRow(1) FROM numbers(6) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- These three parts will have min_age=0 at the time of merge -- and so, nothing will be merged. INSERT INTO test_with_merge SELECT 3; From e4786a611ffc8fc5426c409c4f1e8749af446a34 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 28 Oct 2022 11:45:18 +0000 Subject: [PATCH 18/47] Add long tag --- tests/queries/0_stateless/02473_optimize_old_parts.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sql b/tests/queries/0_stateless/02473_optimize_old_parts.sql index 106175ab6f5..c2bd37033c1 100644 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sql +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sql @@ -1,3 +1,5 @@ +-- Tags: long + DROP TABLE IF EXISTS test_without_merge; DROP TABLE IF EXISTS test_with_merge; DROP TABLE IF EXISTS test_replicated; From fb1623a5f84eb46a3144969a7d8c6c098c8ad377 Mon Sep 17 00:00:00 2001 From: Miel Donkers Date: Fri, 28 Oct 2022 21:04:20 +0200 Subject: [PATCH 19/47] Add SSL_CERTIFICATE auth method to all places missing --- docs/en/operations/system-tables/session_log.md | 1 + docs/en/operations/system-tables/users.md | 2 +- docs/en/sql-reference/statements/alter/user.md | 2 +- docs/en/sql-reference/statements/create/user.md | 5 +++-- src/Interpreters/SessionLog.cpp | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index 79c8ea184ce..cdf86b57ef6 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -24,6 +24,7 @@ Columns: - `DOUBLE_SHA1_PASSWORD` - `LDAP` - `KERBEROS` + - `SSL_CERTIFICATE` - `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. - `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. - `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index eaeabab131b..6ef9b7b18a4 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -12,7 +12,7 @@ Columns: - `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter. -- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. - `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`. diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 0a68885842a..31db89164d7 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index 56a0560e57e..a756b3d4a0d 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] @@ -34,6 +34,7 @@ There are multiple ways of user identification: - `IDENTIFIED WITH double_sha1_hash BY 'hash'` - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'. @@ -54,7 +55,7 @@ Another way of specifying host is to use `@` syntax following the username. Exam - `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. - `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. -:::warning +:::warning ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. ::: diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index 3edb84c046d..79aac63b40c 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -86,6 +86,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() AUTH_TYPE_NAME_AND_VALUE(AuthType::DOUBLE_SHA1_PASSWORD), AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP), AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS), + AUTH_TYPE_NAME_AND_VALUE(AuthType::SSL_CERTIFICATE), }); #undef AUTH_TYPE_NAME_AND_VALUE static_assert(static_cast(AuthenticationType::MAX) == 7); From 97d361bce05f4dadcc5c7abdd56a677793461f49 Mon Sep 17 00:00:00 2001 From: SaltTan <20357526+SaltTan@users.noreply.github.com> Date: Tue, 1 Nov 2022 10:02:04 +0000 Subject: [PATCH 20/47] Update check-table.md --- docs/en/sql-reference/statements/check-table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index f9b428b74a1..9bcda724860 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -8,7 +8,7 @@ title: "CHECK TABLE Statement" Checks if the data in the table is corrupted. ``` sql -CHECK TABLE [db.]name +CHECK TABLE [db.]name PARTITION partition_expr ``` The `CHECK TABLE` query compares actual file sizes with the expected values which are stored on the server. If the file sizes do not match the stored values, it means the data is corrupted. This can be caused, for example, by a system crash during query execution. From 3afc688751a56be3e56cb2d44dd98d91d829ee48 Mon Sep 17 00:00:00 2001 From: SaltTan <20357526+SaltTan@users.noreply.github.com> Date: Tue, 1 Nov 2022 13:04:24 +0000 Subject: [PATCH 21/47] Update docs/en/sql-reference/statements/check-table.md Co-authored-by: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> --- docs/en/sql-reference/statements/check-table.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 9bcda724860..8c4b8ab90a2 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -8,7 +8,7 @@ title: "CHECK TABLE Statement" Checks if the data in the table is corrupted. ``` sql -CHECK TABLE [db.]name PARTITION partition_expr +CHECK TABLE [db.]name [PARTITION partition_expr] ``` The `CHECK TABLE` query compares actual file sizes with the expected values which are stored on the server. If the file sizes do not match the stored values, it means the data is corrupted. This can be caused, for example, by a system crash during query execution. From c4b717b343cee23a329cfb7905d56aa1f2407b43 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 2 Nov 2022 08:24:54 +0100 Subject: [PATCH 22/47] Update src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp Co-authored-by: Sergei Trifonov --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2b123cfbad9..fcc1b4cb3e2 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -364,7 +364,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( auto best_partition_it = std::max_element( partitions_info.begin(), partitions_info.end(), - [](const auto & e1, const auto & e2) { return e1.second.min_age > e2.second.min_age; }); + [](const auto & e1, const auto & e2) { return e1.second.min_age < e2.second.min_age; }); assert(best_partition_it != partitions_info.end()); From 67707230272d0b47659769edd9a8414795115e24 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 3 Nov 2022 12:02:31 +0800 Subject: [PATCH 23/47] modify as requested --- docs/en/sql-reference/functions/string-functions.md | 2 +- src/Functions/ascii.cpp | 6 +++--- tests/queries/0_stateless/02353_ascii.reference | 2 ++ tests/queries/0_stateless/02353_ascii.sql | 2 ++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 982ba05f494..8b9f25e2f98 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1155,7 +1155,7 @@ Do Nothing for 2 Minutes 2:00   Returns the ASCII code point of the first character of str. The result type is Int32. -If s is empty, the result is 0. If the first character is not an ASCII character or part of the Latin-1 Supplement range of UTF-16, the result is undefined. +If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined. diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index 38ba7f0bbac..abf9078057e 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -37,7 +37,7 @@ struct AsciiImpl [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) { - throw Exception("vectorFixedToConstant not implemented for function " + std::string(AsciiName::name), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", std::string(AsciiName::name)); } static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) @@ -52,12 +52,12 @@ struct AsciiImpl [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) { - throw Exception("Cannot apply function " + std::string(AsciiName::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", std::string(AsciiName::name)); } [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) { - throw Exception("Cannot apply function " + std::string(AsciiName::name) + " to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", std::string(AsciiName::name)); } private: diff --git a/tests/queries/0_stateless/02353_ascii.reference b/tests/queries/0_stateless/02353_ascii.reference index b85ac563483..79588517e2a 100644 --- a/tests/queries/0_stateless/02353_ascii.reference +++ b/tests/queries/0_stateless/02353_ascii.reference @@ -1,5 +1,7 @@ 50 +0 50 +0 48 49 50 diff --git a/tests/queries/0_stateless/02353_ascii.sql b/tests/queries/0_stateless/02353_ascii.sql index 936b0d460db..5b7a20ad61c 100644 --- a/tests/queries/0_stateless/02353_ascii.sql +++ b/tests/queries/0_stateless/02353_ascii.sql @@ -1,3 +1,5 @@ SELECT ascii('234'); +SELECT ascii(''); SELECT ascii(materialize('234')); +SELECT ascii(materialize('')); SELECT ascii(toString(number) || 'abc') from numbers(10); From a6c4204b0c884aa97039b037247ac249b2a086f1 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 3 Nov 2022 07:33:48 +0000 Subject: [PATCH 24/47] Don't iterate all cached nodes on commit --- src/Coordination/KeeperStorage.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 875dccfd705..242e7d200f8 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -377,6 +377,7 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) { assert(deltas.empty() || deltas.front().zxid >= commit_zxid); + std::unordered_set modified_nodes; while (!deltas.empty() && deltas.front().zxid == commit_zxid) { if (std::holds_alternative(deltas.front().operation)) @@ -393,7 +394,14 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) assert(path_deltas.front() == &front_delta); path_deltas.pop_front(); if (path_deltas.empty()) + { deltas_for_path.erase(front_delta.path); + modified_nodes.insert(std::move(front_delta.path)); + } + else if (path_deltas.front()->zxid > commit_zxid) + { + modified_nodes.insert(std::move(front_delta.path)); + } } else if (auto * add_auth = std::get_if(&front_delta.operation)) { @@ -409,9 +417,11 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) } // delete all cached nodes that were not modified after the commit_zxid - // the commit can end on SubDeltaEnd so we don't want to clear cached nodes too soon - if (deltas.empty() || deltas.front().zxid > commit_zxid) - std::erase_if(nodes, [commit_zxid](const auto & node) { return node.second.zxid == commit_zxid; }); + for (const auto & node : modified_nodes) + { + if (nodes[node].zxid == commit_zxid) + nodes.erase(node); + } } void KeeperStorage::UncommittedState::rollback(int64_t rollback_zxid) From 664690f9a92d35690baa8114f8f3083ffd85f5a6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 3 Nov 2022 07:38:43 +0000 Subject: [PATCH 25/47] Add comments --- src/Coordination/KeeperStorage.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 242e7d200f8..3153d17899d 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -377,7 +377,9 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) { assert(deltas.empty() || deltas.front().zxid >= commit_zxid); + // collect nodes that have no further modification in the current transaction std::unordered_set modified_nodes; + while (!deltas.empty() && deltas.front().zxid == commit_zxid) { if (std::holds_alternative(deltas.front().operation)) @@ -396,10 +398,13 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) if (path_deltas.empty()) { deltas_for_path.erase(front_delta.path); + + // no more deltas for path -> no modification modified_nodes.insert(std::move(front_delta.path)); } else if (path_deltas.front()->zxid > commit_zxid) { + // next delta has a zxid from a different transaction -> no modification in this transaction modified_nodes.insert(std::move(front_delta.path)); } } @@ -417,6 +422,7 @@ void KeeperStorage::UncommittedState::commit(int64_t commit_zxid) } // delete all cached nodes that were not modified after the commit_zxid + // we only need to check the nodes that were modified in this transaction for (const auto & node : modified_nodes) { if (nodes[node].zxid == commit_zxid) From f39cb41d3e1f138180caf4b1f340954015a27396 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 3 Nov 2022 16:44:57 +0800 Subject: [PATCH 26/47] change as requested --- src/Functions/ascii.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index abf9078057e..cb59be55cc1 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -37,7 +37,7 @@ struct AsciiImpl [[noreturn]] static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, Int32 & /*res*/) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", std::string(AsciiName::name)); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "vectorFixedToConstant not implemented for function {}", AsciiName::name); } static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) @@ -52,12 +52,12 @@ struct AsciiImpl [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", std::string(AsciiName::name)); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to Array argument", AsciiName::name); } [[noreturn]] static void uuid(const ColumnUUID::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) { - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", std::string(AsciiName::name)); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", AsciiName::name); } private: @@ -76,7 +76,7 @@ REGISTER_FUNCTION(Ascii) R"( Returns the ASCII code point of the first character of str. The result type is Int32. -If s is empty, the result is 0. If the first character is not an ASCII character or part of the Latin-1 Supplement range of UTF-16, the result is undefined) +If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined) )", Documentation::Examples{{"ascii", "SELECT ascii('234')"}}, Documentation::Categories{"String"} From 7186898ffaa86d909d8de838991f30d5f3a152bd Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 4 Nov 2022 00:29:17 +0800 Subject: [PATCH 27/47] Experiment --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 83e87a0e462..c62d6337dd9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5783,7 +5783,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg const auto & analysis_result = select.getAnalysisResult(); query_info.prepared_sets = select.getQueryAnalyzer()->getPreparedSets(); - query_info.prewhere_info = analysis_result.prewhere_info; + // query_info.prewhere_info = analysis_result.prewhere_info; const auto & before_where = analysis_result.before_where; const auto & where_column_name = analysis_result.where_column_name; @@ -6173,8 +6173,8 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg selected_candidate->aggregate_descriptions = select.getQueryAnalyzer()->aggregates(); } - /// Just in case, reset prewhere info calculated from projection. - query_info.prewhere_info.reset(); + // /// Just in case, reset prewhere info calculated from projection. + // query_info.prewhere_info.reset(); return *selected_candidate; } From d901ead1bcb375e45bc492d8af2925eb7f138287 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 4 Nov 2022 17:26:44 +0800 Subject: [PATCH 28/47] Done --- src/Storages/MergeTree/MergeTreeData.cpp | 7 ++++--- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 3 ++- src/Storages/MergeTree/MergeTreeDataSelectExecutor.h | 1 + .../0_stateless/01710_projection_in_index.reference | 1 + .../queries/0_stateless/01710_projection_in_index.sql | 10 ++++++++++ 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c62d6337dd9..977133a8ad8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5426,6 +5426,7 @@ static void selectBestProjection( auto projection_result_ptr = reader.estimateNumMarksToRead( projection_parts, + candidate.prewhere_info, candidate.required_columns, storage_snapshot->metadata, candidate.desc->metadata, @@ -5449,6 +5450,7 @@ static void selectBestProjection( { auto normal_result_ptr = reader.estimateNumMarksToRead( normal_parts, + query_info.prewhere_info, required_columns, storage_snapshot->metadata, storage_snapshot->metadata, @@ -5783,7 +5785,6 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg const auto & analysis_result = select.getAnalysisResult(); query_info.prepared_sets = select.getQueryAnalyzer()->getPreparedSets(); - // query_info.prewhere_info = analysis_result.prewhere_info; const auto & before_where = analysis_result.before_where; const auto & where_column_name = analysis_result.where_column_name; @@ -6060,6 +6061,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { auto normal_result_ptr = reader.estimateNumMarksToRead( normal_parts, + query_info.prewhere_info, analysis_result.required_columns, metadata_snapshot, metadata_snapshot, @@ -6092,6 +6094,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { query_info.merge_tree_select_result_ptr = reader.estimateNumMarksToRead( parts, + query_info.prewhere_info, analysis_result.required_columns, metadata_snapshot, metadata_snapshot, @@ -6173,8 +6176,6 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg selected_candidate->aggregate_descriptions = select.getQueryAnalyzer()->aggregates(); } - // /// Just in case, reset prewhere info calculated from projection. - // query_info.prewhere_info.reset(); return *selected_candidate; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index afdd98b8e41..674e02b16ec 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1294,6 +1294,7 @@ static void selectColumnNames( MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, + const PrewhereInfoPtr & prewhere_info, const Names & column_names_to_return, const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, @@ -1318,7 +1319,7 @@ MergeTreeDataSelectAnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar return ReadFromMergeTree::selectRangesToRead( std::move(parts), - query_info.prewhere_info, + prewhere_info, added_filter_nodes, metadata_snapshot_base, metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 541f6446674..e302663597d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -56,6 +56,7 @@ public: /// This method is used to select best projection for table. MergeTreeDataSelectAnalysisResultPtr estimateNumMarksToRead( MergeTreeData::DataPartsVector parts, + const PrewhereInfoPtr & prewhere_info, const Names & column_names, const StorageMetadataPtr & metadata_snapshot_base, const StorageMetadataPtr & metadata_snapshot, diff --git a/tests/queries/0_stateless/01710_projection_in_index.reference b/tests/queries/0_stateless/01710_projection_in_index.reference index 73c1df53be4..4be49ff0513 100644 --- a/tests/queries/0_stateless/01710_projection_in_index.reference +++ b/tests/queries/0_stateless/01710_projection_in_index.reference @@ -1,2 +1,3 @@ 1 1 1 2 2 2 +1 diff --git a/tests/queries/0_stateless/01710_projection_in_index.sql b/tests/queries/0_stateless/01710_projection_in_index.sql index 2669d69dc9f..87f5e79e37e 100644 --- a/tests/queries/0_stateless/01710_projection_in_index.sql +++ b/tests/queries/0_stateless/01710_projection_in_index.sql @@ -9,3 +9,13 @@ set allow_experimental_projection_optimization = 1, max_rows_to_read = 3; select * from t where i < 5 and j in (1, 2); drop table t; + +drop table if exists test; + +create table test (name String, time Int64) engine MergeTree order by time; + +insert into test values ('hello world', 1662336000241); + +select count() from (select fromUnixTimestamp64Milli(time, 'UTC') time_fmt, name from test where time_fmt > '2022-09-05 00:00:00'); + +drop table test; From c9b4bc66b564800f8c44ab920f91a85fd4d390dc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 4 Nov 2022 12:19:30 +0100 Subject: [PATCH 29/47] Fiux --- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 10 ++++++++++ tests/integration/test_storage_postgresql/test.py | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 8e89765b635..2a5f7a90beb 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -443,6 +443,16 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c if (which.isArray()) return makeASTFunction("Array", getColumnDeclaration(typeid_cast(data_type.get())->getNestedType())); + if (which.isDateTime64()) + { + auto ast_expression = std::make_shared(); + + ast_expression->name = "DateTime64"; + ast_expression->arguments = std::make_shared(); + ast_expression->arguments->children.emplace_back(std::make_shared(static_cast(6))); + return ast_expression; + } + return std::make_shared(data_type->getName()); } diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index a3ebbe97451..7cc350e0be2 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -693,6 +693,19 @@ def test_auto_close_connection(started_cluster): assert count == 2 +def test_datetime(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("drop table if exists test") + cursor.execute("create table test (u timestamp)") + + node1.query("drop database if exists pg") + node1.query("create database pg engine = PostgreSQL(postgres1)") + assert "DateTime64(6)" in node1.query("show create table pg.test") + node1.query("detach table pg.test") + node1.query("attach table pg.test") + assert "DateTime64(6)" in node1.query("show create table pg.test") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From 6ada8e9e39223c84668ca18aa45c0661bc903cbc Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 4 Nov 2022 13:37:10 +0100 Subject: [PATCH 30/47] Update src/Databases/PostgreSQL/DatabasePostgreSQL.cpp Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 2a5f7a90beb..79133d2e2fd 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -445,12 +445,7 @@ ASTPtr DatabasePostgreSQL::getColumnDeclaration(const DataTypePtr & data_type) c if (which.isDateTime64()) { - auto ast_expression = std::make_shared(); - - ast_expression->name = "DateTime64"; - ast_expression->arguments = std::make_shared(); - ast_expression->arguments->children.emplace_back(std::make_shared(static_cast(6))); - return ast_expression; + return makeASTFunction("DateTime64", std::make_shared(static_cast(6))); } return std::make_shared(data_type->getName()); From a89e8475145d5e4197079d319bf3f274aaa830d0 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 1 Nov 2022 22:33:52 +0800 Subject: [PATCH 31/47] Fix getauxval for sanitizer builds --- base/glibc-compatibility/musl/getauxval.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index 22886013d07..aaaca0465ea 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -8,6 +8,8 @@ #include // ElfW #include +#include "syscall.h" + #define ARRAY_SIZE(a) sizeof((a))/sizeof((a[0])) /// Suppress TSan since it is possible for this code to be called from multiple threads, @@ -39,7 +41,9 @@ ssize_t __retry_read(int fd, void * buf, size_t count) { for (;;) { - ssize_t ret = read(fd, buf, count); + // We cannot use the read syscall as it will be intercept by sanitizers, which aren't + // initialized yet. Emit syscall directly. + ssize_t ret = __syscall_ret(__syscall(SYS_read, fd, buf, count)); if (ret == -1) { if (errno == EINTR) From 4c2b3de93d1b0b2df78c9647a94c45d7bffb557e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 4 Nov 2022 20:21:50 +0800 Subject: [PATCH 32/47] Fix msan error --- base/glibc-compatibility/musl/getauxval.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/base/glibc-compatibility/musl/getauxval.c b/base/glibc-compatibility/musl/getauxval.c index aaaca0465ea..eba12604b4d 100644 --- a/base/glibc-compatibility/musl/getauxval.c +++ b/base/glibc-compatibility/musl/getauxval.c @@ -94,6 +94,11 @@ static unsigned long NO_SANITIZE_THREAD __auxv_init_procfs(unsigned long type) _Static_assert(sizeof(aux) < 4096, "Unexpected sizeof(aux)"); while (__retry_read(fd, &aux, sizeof(aux)) == sizeof(aux)) { +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + __msan_unpoison(&aux, sizeof(aux)); +#endif +#endif if (aux.a_type == AT_NULL) { break; From 5cb69d8a22a9e4b70fc574eaea416041483e48e5 Mon Sep 17 00:00:00 2001 From: Aleksandr Musorin Date: Wed, 2 Nov 2022 11:04:58 +0100 Subject: [PATCH 33/47] changed type name for S3_Plain storage renamed a disk for S3PlainObjectStorage in system.disks table from s3 to s3_plain --- src/Disks/DiskType.h | 3 +++ src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 4 +++- .../test_backup_restore_s3/test.py | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 037b65f8e07..4d099e33a7a 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -11,6 +11,7 @@ enum class DataSourceType Local, RAM, S3, + S3_Plain, HDFS, WebServer, AzureBlobStorage, @@ -26,6 +27,8 @@ inline String toString(DataSourceType data_source_type) return "memory"; case DataSourceType::S3: return "s3"; + case DataSourceType::S3_Plain: + return "s3_plain"; case DataSourceType::HDFS: return "hdfs"; case DataSourceType::WebServer: diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 6b1e8289b15..56f1c895924 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -213,7 +213,9 @@ public: template S3PlainObjectStorage(Args && ...args) : S3ObjectStorage("S3PlainObjectStorage", std::forward(args)...) - {} + { + data_source_description.type = DataSourceType::S3_Plain; + } }; } diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 7ddb1459ab9..ee9c458c044 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -50,6 +50,24 @@ def check_backup_and_restore(storage_policy, backup_destination, size=1000): ) +def check_system_tables(): + disks = [ + tuple(disk.split("\t")) + for disk in node.query("SELECT name, type FROM system.disks").split("\n") + if disk + ] + expected_disks = ( + ("default", "local"), + ("disk_s3", "s3"), + ("disk_s3_other_bucket", "s3"), + ("disk_s3_plain", "s3_plain"), + ) + assert len(expected_disks) == len(disks) + for expected_disk in expected_disks: + if expected_disk not in disks: + raise AssertionError(f"Missed {expected_disk} in {disks}") + + @pytest.mark.parametrize( "storage_policy, to_disk", [ @@ -93,6 +111,7 @@ def test_backup_to_s3(): f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) check_backup_and_restore(storage_policy, backup_destination) + check_system_tables() def test_backup_to_s3_named_collection(): From c98731a19b2e7af00480a768175ce111a9cceaa9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 4 Nov 2022 19:22:04 +0100 Subject: [PATCH 34/47] Remove some utils --- utils/CMakeLists.txt | 4 - utils/db-generator/CMakeLists.txt | 2 - utils/db-generator/README.md | 35 - utils/db-generator/query_db_generator.cpp | 1354 ----------------- utils/iotest/CMakeLists.txt | 9 - utils/iotest/iotest.cpp | 197 --- utils/iotest/iotest_aio.cpp | 203 --- utils/iotest/iotest_nonblock.cpp | 177 --- .../CMakeLists.txt | 3 - .../main.cpp | 286 ---- .../CMakeLists.txt | 2 - .../main.cpp | 47 - 12 files changed, 2319 deletions(-) delete mode 100644 utils/db-generator/CMakeLists.txt delete mode 100644 utils/db-generator/README.md delete mode 100644 utils/db-generator/query_db_generator.cpp delete mode 100644 utils/iotest/CMakeLists.txt delete mode 100644 utils/iotest/iotest.cpp delete mode 100644 utils/iotest/iotest_aio.cpp delete mode 100644 utils/iotest/iotest_nonblock.cpp delete mode 100644 utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt delete mode 100644 utils/zookeeper-adjust-block-numbers-to-parts/main.cpp delete mode 100644 utils/zookeeper-create-entry-to-download-part/CMakeLists.txt delete mode 100644 utils/zookeeper-create-entry-to-download-part/main.cpp diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 92a97a9c60e..70c32c67063 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -20,17 +20,13 @@ add_subdirectory (report) # Not used in package if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (compressor) - add_subdirectory (iotest) add_subdirectory (corrector_utf8) add_subdirectory (zookeeper-cli) add_subdirectory (zookeeper-dump-tree) add_subdirectory (zookeeper-remove-by-list) - add_subdirectory (zookeeper-create-entry-to-download-part) - add_subdirectory (zookeeper-adjust-block-numbers-to-parts) add_subdirectory (wikistat-loader) add_subdirectory (check-marks) add_subdirectory (checksum-for-compressed-block) - add_subdirectory (db-generator) add_subdirectory (wal-dump) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) diff --git a/utils/db-generator/CMakeLists.txt b/utils/db-generator/CMakeLists.txt deleted file mode 100644 index 45780717752..00000000000 --- a/utils/db-generator/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (query_db_generator query_db_generator.cpp) -target_link_libraries(query_db_generator PRIVATE clickhouse_parsers boost::program_options) diff --git a/utils/db-generator/README.md b/utils/db-generator/README.md deleted file mode 100644 index 5596aac66e4..00000000000 --- a/utils/db-generator/README.md +++ /dev/null @@ -1,35 +0,0 @@ -# Clickhouse query analysis - -Here we will consider only `SELECT` queries, i.e. those queries that get data from the table. -The built-in Clickhouse parser accepts a string as input, which is a query. Among 14 main clauses of `SELECT` statement: `WITH`, `SELECT`, `TABLES`, `PREWHERE`, `WHERE`, `GROUP_BY`, `HAVING`, `ORDER_BY`, `LIMIT_BY_OFFSET`, `LIMIT_BY_LENGTH`, `LIMIT_BY`, `LIMIT_OFFSET`, `LIMIT_LENGTH`, `SETTINGS`, we will analyze the `SELECT`, `TABLES`, `WHERE`, `GROUP_BY`, `HAVING`, `ORDER_BY` clauses because the most of data is there. We need this data to analyze the structure and to identify values. The parser issues a tree structure after parsing a query, where each node is a specific query execution operation, a function over values, a constant, a designation, etc. Nodes also have subtrees where their arguments or suboperations are located. We will try to reveal the data we need by avoiding this tree. - -## Scheme analysis - -It is necessary to determine possible tables by a query. Having a query string, you can understand which parts of it represent the names of the tables, so you can determine their number in our database. -In the Clickhouse parser, `TABLES` (Figure 1) is a query subtree responsible for tables where we get data. It contains the main table where the columns come from, as well as the `JOIN` operations that are performed in the query. Avoiding all nodes in the subtree, we use the names of the tables and databases where they are located, as well as their alias, i.e. the shortened names chosen by the query author. We may need these names to determine the ownership of the column in the future. -Thus, we get a set of databases for the query, as well as tables and their aliases, with the help of them a query is made. - -Then we need to define the set of columns that are in the query and the tables they can refer to. The set of columns in each table is already known during the query execution. Therefore, the program automatically links the column and table at runtime. However, in our case, it is impossible to unambiguously interpret the belonging of a column to a specific table, for example, in the following query `SELECT column1, column2, column3 FROM table1 JOIN table2 on table1.column2 = table2.column3`. In this case, we can say which table `column2` and `column3` belong to. However, `column1` can belong to either the first or the second table. We will refer undefined columns to the main table, on which a query is made, for unambiguous interpretation of such cases. For example, in this case, it will be `table1`. -All columns in the tree are in `IDENTIFIER` type nodes, which are in the `SELECT`, `TABLES`, `WHERE`, `GROUP_BY`, `HAVING`, `ORDER_BY` subtrees. We form a set of all tables recursively avoiding the subtrees, then we split the column into constituents such as the table (if it is explicitly specified with a dot) and the name. Then, since the table can be an alias, we replace the alias with the original table name. We now have a list of all the columns and tables they belong to. We define the main query table for non-table columns. - -## Column analysis - -Then we need to exactly define data types for columns that have a value in the query. An example is the boolean `WHERE` clause where we test boolean expressions in its attributes. If the query specifies `column > 5`, then we can conclude that this column contains a numeric value, or if the `LIKE` expression is applied to the attribute, then the attribute has a string type. -In this part, you need to learn how to extract such expressions from a query and match data types for columns, where it is possible. At the same time, it is clear that it is not always possible to make an unambiguous decision about the type of a particular attribute from the available values. For example, `column > 5` can mean many numeric types such as `UINT8`, `UINT32`, `INT32`, `INT64`, etc. It is necessary to determine the interpretation of certain values since searching through all possible values ​​can be quite large and long. -It can take a long time to iterate over all possible values, so we use `INT64` and `FLOAT64` types for numeric values, `STRING` for strings, `DATE` and `DATETIME` for dates, and `ARRAY`. -We can determine column values ​​using boolean, arithmetic and other functions on the column values ​​that are specified in the query. Such functions are in the `SELECT` and `WHERE` subtrees. The function parameter can be a constant, a column or another function (Figure 2). Thus, the following parameters can help to understand the type of the column: -- The types of arguments that a function can take, for example, the `TOSTARTOFMINUTE` function (truncate time up to a multiple of 5 minutes down) can only accept `DATETIME`, so if the argument of this function is a column, then this column has `DATETIME` type. -- The types of the remaining arguments in this function. For example, the `EQUALS` function means equality of its argument types, so if a constant and a column are present in this function, then we can define the type of the column as the type of the constant. - -Thus, we define the possible argument types, the return type, the parameter for each function, and the function arguments of the identical type. The recursive function handler will determine the possible types of columns used in these functions by the values of the arguments, and then return the possible types of the function's result. -Now, for each column, we have many possible types of values. We will choose one specific type from this set to interpret the query unambiguously. - -## Column values definition - -At this stage, we already have a certain structure of the database tables, we need to fill this table with values. We should understand which columns depend on each other when executing the function (for example, the join is done according to two columns, which means that they must have the same values). We also need to understand what values ​​the columns must have to fulfill various conditions during execution. -We search for all comparison operations in our query to achieve the goal. If the arguments of the operation are two columns, then we consider them linked. If the arguments are the column and the value, then we assign that value to the possible column value and add the value with some noise. A random number is a noise for a numeric type, it is a random number of days for a date, etc. In this case, a handler for this operation is required for each comparison operation, which generates at least two values, one of them is the operation condition, and the other is not. For example, a value greater than 5 and less than or equal to 5 must be assigned for the operation `column1 > 5`, `column1`, for the operation `column2 LIKE some% string` the same is true. The satisfying and not satisfying expression must be assigned to `column2`. -Now we have many associated columns and many values. We know that the connectivity of columns is symmetric, but we need to add transitivity for a complete definition, because if `column1 = column2` and `column2 = column3`, then `column1 = column3`, but this does not follow from the construction. Accordingly, we need to extend the connectivity across all columns. We combine multiple values for each column with the values associated with it. If we have columns with no values, then we generate random values. - -## Generation - -We have a complete view of the database schema as well as many values ​​for each table now. We will generate data by cartesian product of the value set of each column for a specific table. Thus, we get a set for each table, consisting of sets of values for each column. We start generating queries that create this table and fill it with data. We generate the `CREATE QUERY` that creates this table based on the structure of the table and the types of its columns, and then we generate the `INSERT QUERY` over the set of values, which fills the table with data. diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp deleted file mode 100644 index 00785af89f7..00000000000 --- a/utils/db-generator/query_db_generator.cpp +++ /dev/null @@ -1,1354 +0,0 @@ -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -namespace po = boost::program_options; - -using ColumnType = uint32_t; -using TableAndColumn = std::pair; -pcg64 rng; - -std::string randomString(size_t length) -{ - auto randchar = []() -> char - { - const char charset[] = "0123456789" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"; - const size_t max_index = (sizeof(charset) - 1); - return charset[rng() % max_index]; - }; - std::string str(length, 0); - std::generate_n(str.begin(), length, randchar); - return str; -} -std::string randomInteger(unsigned int min = 0, unsigned int max = 4294967295) -{ - int r = rng() % (max - min) + min; - return std::to_string(r); -} - -std::string randomFloat(unsigned int min = 0, unsigned int max = 4294967295) -{ - float r = static_cast(rng() % max) / (static_cast(rng() % 100)) + min; - return std::to_string(r); -} - -std::string randomDate() -{ - int32_t year = rng() % 136 + 1970; - int32_t month = rng() % 12 + 1; - int32_t day = rng() % 12 + 1; - char answer[13]; - size_t size = sprintf(answer, "'%04u-%02u-%02u'", year, month, day); - return std::string(answer, size); -} - -std::string randomDatetime() -{ - int32_t year = rng() % 136 + 1970; - int32_t month = rng() % 12 + 1; - int32_t day = rng() % 12 + 1; - int32_t hours = rng() % 24; - int32_t minutes = rng() % 60; - int32_t seconds = rng() % 60; - char answer[22]; - size_t size = sprintf( - answer, - "'%04u-%02u-%02u %02u:%02u:%02u'", - year, - month, - day, - hours, - minutes, - seconds); - return std::string(answer, size); -} -TableAndColumn get_table_a_column(const std::string & c) -{ - auto point_place = c.rfind('.'); - std::string db{}; - std::string column{}; - if (point_place != std::string::npos) - { - db = c.substr(0, point_place); - column = c.substr(point_place + 1); - } - else - { - column = c; - } - return { db, column }; -} - - -enum Type : ColumnType -{ - i = 1, - // int - f = 2, - // float - s = 4, - // string - d = 8, - // date - dt = 16, - // datetime - b = 32, - // bool - all = 63, - a = 64, - // array - t = 128, - // tuple -}; - - -std::map type_definition = -{ - {Type::i, "Int64"}, {Type::f, "Float64"}, {Type::s, "String"}, {Type::d, "Date"}, {Type::dt, "DateTime"}, {Type::b, "UInt8"} -}; - -ColumnType time_type(std::string value) -{ - if (value.length() == 12) - { - for (size_t i : {5, 8}) - { - if (value[i] != '-') - return Type::s; - } - for (size_t i : {1, 2, 3, 4, 6, 7, 9, 10}) - { - if (!isdigit(value[i])) - return Type::s; - } - return Type::d; - } - - if (value.length() == 21) - { - for (size_t i : {5, 8}) - { - if (value[i] != '-') - return Type::s; - } - for (size_t i : {14, 17}) - { - if (value[i] != '-') - return Type::s; - } - if (value[11] != '-') - return Type::s; - return Type::dt; - } - return Type::s; -} -// Casting inner clickhouse parser type to our type -ColumnType type_cast(int t) -{ - switch (t) - { - case 1: - case 2: - case 4: - case 5: - case 19: - case 20: - case 21: - return Type::i; - - case 3: - return Type::f; - - case 16: - return Type::s; - - case 17: - return Type::a | Type::all; - - case 18: - return Type::t | Type::all; - } - return Type::all; -} - - -class FuncRet -{ -public: - FuncRet() = default; - - FuncRet(ColumnType t, std::string v) - : value(v) - , type(t) {} - - FuncRet(ColumnType t, std::string v, bool is_a) - : value(v) - , type(t) - , is_array(is_a) {} - - std::string value{}; - ColumnType type = Type::all; - bool is_array = false; -}; - - -std::map func_to_return_type = { - {"divide", FuncRet(Type::f, "")}, {"e", FuncRet(Type::f, "e()")}, {"pi", FuncRet(Type::f, "pi()")}, {"exp", FuncRet(Type::f, "")}, - {"log", FuncRet(Type::f,"")}, {"exp2", FuncRet(Type::f, "")}, {"log2", FuncRet(Type::f, "")}, {"exp10", FuncRet(Type::f, "")}, - {"log10", FuncRet(Type::f, "")}, {"sqrt", FuncRet(Type::f, "")}, {"cbrt", FuncRet(Type::f, "")}, {"erf", FuncRet(Type::f, "")}, - {"erfc", FuncRet(Type::f, "")}, {"lgamma", FuncRet(Type::f, "")}, {"tgamma", FuncRet(Type::f, "")}, {"sin", FuncRet(Type::f, "")}, - {"cos", FuncRet(Type::f, "")}, {"tan", FuncRet(Type::f, "")}, {"asin", FuncRet(Type::f, "")}, {"acos", FuncRet(Type::f, "")}, - {"atan", FuncRet(Type::f, "")}, {"pow", FuncRet(Type::f, "")}, {"splitbystring", FuncRet(Type::s | Type::a,"")}, - {"splitbychar", FuncRet(Type::s | Type::a, "")}, {"alphatokens", FuncRet(Type::s | Type::a, "")}, {"toyear", FuncRet(Type::i, "")}, - {"tomonth", FuncRet(Type::i, "")}, {"todayofmonth", FuncRet(Type::i, "")}, {"tohour", FuncRet(Type::dt, "")}, {"tominute", FuncRet(Type::dt, "")}, - {"toseconds", FuncRet(Type::dt, "")}, {"tounixtimestamp", FuncRet(Type::i, "")}, {"tostartofyear", FuncRet(Type::dt | Type::d, "")}, - {"tostartofquater",FuncRet(Type::dt | Type::d, "")}, {"tostartofmonth", FuncRet(Type::dt | Type::d, "")}, {"tomonday", FuncRet(Type::dt | Type::d, "")}, - {"tostartoffiveminutes", FuncRet(Type::dt, "")}, {"tostartoftenminutes", FuncRet(Type::dt, "")}, {"tostartoffifteenminutes", FuncRet(Type::dt, "")}, - {"tostartofinterval", FuncRet(Type::dt, "")}, {"totime", FuncRet(Type::dt, "")}, {"torelativemonthnum", FuncRet(Type::i, "")}, - {"torelativeweeknum", FuncRet(Type::i, "")}, {"torelativedaynum", FuncRet(Type::i, "")}, {"torelativehournum", FuncRet(Type::i, "")}, - {"torelativeminutenum", FuncRet(Type::i, "")}, {"torelativesecondsnum", FuncRet(Type::i, "")}, {"datediff", FuncRet(Type::d | Type::dt, "")}, - {"formatdatetime", FuncRet(Type::s, "")}, {"now", FuncRet(Type::dt | Type::d, "now()")}, {"today", FuncRet(Type::d | Type::dt, "today()")}, - {"yesterday", FuncRet(Type::d | Type::dt, "yesterday()")}, {"tolastdayofmonth", FuncRet(Type::dt | Type::d, "")} -}; - -std::set func_args_same_types = { - "equals", "notequals", "less", "greater", "lessorequals", "greaterorequals", "multiply" -}; - -std::map func_to_param_type = { - {"tostartofminute", Type::dt}, {"plus", Type::i | Type::f | Type::d | Type::dt}, {"multiply", Type::i | Type::f}, - {"minus", Type::i | Type::f | Type::d | Type::dt}, {"negate", Type::i | Type::f}, {"divide", Type::i | Type::f}, - {"abs", Type::i | Type::f}, {"gcd", Type::i | Type::f}, {"lcm", Type::i | Type::f}, {"bitnot", Type::i}, {"bitshiftleft", Type::i}, - {"bitshiftright", Type::i}, {"bittest", Type::i}, {"exp", Type::i | Type::f}, {"log", Type::i | Type::f}, - {"exp2", Type::i | Type::f}, {"log2", Type::i | Type::f}, {"exp10", Type::i | Type::f}, {"log10", Type::i | Type::f}, - {"sqrt", Type::i | Type::f}, {"cbrt", Type::i | Type::f}, {"erf", Type::i | Type::f}, {"erfc", Type::i | Type::f}, - {"lgamma", Type::i | Type::f}, {"tgamma", Type::i | Type::f}, {"sin", Type::i | Type::f}, {"cos", Type::i | Type::f}, - {"tan", Type::i | Type::f}, {"asin", Type::i | Type::f}, {"acos", Type::i | Type::f}, {"atan", Type::i | Type::f}, - {"pow", Type::i | Type::f}, {"arrayjoin", Type::all | Type::a}, {"substring", Type::s}, {"splitbystring", Type::s}, {"splitbychar", Type::s}, - {"alphatokens", Type::s}, {"toyear", Type::d | Type::dt}, {"tomonth", Type::d | Type::dt}, {"todayofmonth", Type::d | Type::dt}, {"tohour", Type::dt}, - {"tominute", Type::dt}, {"tosecond", Type::dt}, {"touixtimestamp", Type::dt}, {"tostartofyear", Type::d | Type::dt}, - {"tostartofquarter", Type::d | Type::dt}, {"tostartofmonth", Type::d | Type::dt}, {"tomonday", Type::d | Type::dt}, - {"tostartoffiveminutes", Type::dt}, {"tostartoftenminutes", Type::dt}, {"tostartoffifteenminutes", Type::d | Type::dt}, - {"tostartofinterval", Type::d | Type::dt}, {"totime", Type::d | Type::dt}, {"torelativehonthnum", Type::d | Type::dt}, - {"torelativeweeknum", Type::d | Type::dt}, {"torelativedaynum", Type::d | Type::dt}, {"torelativehournum", Type::d | Type::dt}, - {"torelativeminutenum", Type::d | Type::dt}, {"torelativesecondnum", Type::d | Type::dt}, {"datediff", Type::d | Type::dt}, - {"formatdatetime", Type::dt}, {"tolastdayofmonth", Type::d | Type::dt} -}; - - -class Column -{ -public: - TableAndColumn name; - std::set equals; - std::set values; - ColumnType type = Type::all; - bool is_array = false; - - Column() = default; - - explicit Column(const std::string & column_name) - { - name = std::make_pair("", column_name); - type = Type::all; - } - - void merge(Column other) - { - if (name.second.empty()) - name = other.name; - equals.insert(other.equals.begin(), other.equals.end()); - values.insert(other.values.begin(), other.values.end()); - type &= other.type; - is_array |= other.is_array; - } - - void printType() const - { - if (type & Type::i) - std::cout << "I"; - if (type & Type::f) - std::cout << "F"; - if (type & Type::s) - std::cout << "S"; - if (type & Type::d) - std::cout << "D"; - if (type & Type::dt) - std::cout << "DT"; - if (is_array) - std::cout << "ARR"; - std::cout << "\n"; - } - - void print() - { - std::cout << name.first << "." << name.second << "\n"; - std::cout << "type: "; - printType(); - std::cout << "values:"; - for (const auto & val : values) - std::cout << " " << val; - std::cout << "\n"; - std::cout << "equal:"; - for (const auto & col : equals) - std::cout << " " << col.first << "." << col.second; - std::cout << "\n"; - } - - std::string generateOneValue() const - { - if (type & Type::i) - return randomInteger(); - - if (type & Type::f) - return randomFloat(); - - if (type & Type::d) - return randomDate(); - - if (type & Type::dt) - return randomDatetime(); - - if (type & Type::s) - return "'" + randomString(rng() % 40) + "'"; - - if (type & Type::b) - return "0"; - - return ""; - } - - bool generateValues(int amount = 0) - { - if (values.size() > 2 && amount == 0) - return false; - while (values.empty() or amount > 0) - { - amount -= 1; - if (is_array) - { - std::string v = "["; - for (unsigned int i = 0; i < static_cast(rng()) % 10 + 1; ++i) - { - if (i != 0) - v += ", "; - v += generateOneValue(); - } - v += "]"; - values.insert(v); - } - else - { - values.insert(generateOneValue()); - } - } - return true; - } - - void unifyType() - { - if (type & Type::i) - type = Type::i; - else if (type & Type::f) - type = Type::f; - else if (type & Type::d) - type = Type::d; - else if (type & Type::dt) - type = Type::dt; - else if (type & Type::s) - type = Type::s; - else if (type & Type::b) - type = Type::b; - else - throw std::runtime_error("Error in determination column type " + name.first + '.' + name.second); - } -}; - - -std::set> -decartMul( - std::set> & prev, - std::set & mul) -{ - std::set> result; - for (const auto & v : prev) - { - for (const auto & m : mul) - { - std::vector tmp = v; - tmp.push_back(m); - result.insert(tmp); - } - } - return result; -} - - -class Table -{ -public: - Table() = default; - - explicit Table(std::string table_name) - : name(table_name) {} - - std::string name; - std::set columns; - std::map column_description; - - bool columnExists(const std::string & column_name) const - { - return columns.contains(column_name); // || columns_maybe.contains(column_name); - } - - void addColumn(const std::string & column_name) - { - columns.insert(column_name); - } - - void setDescription(Column other) - { - column_description[other.name.second].merge(other); - } - - void print() - { - std::cout << "Table\n"; - std::cout << name << "\n"; - std::cout << "Columns:\n\n"; - for (const auto & column : columns) - { - std::cout << column << "\n"; - if (column_description.contains(column)) - column_description[column].print(); - std::cout << "\n"; - } - std::cout << "\n"; - } - - void merge(Table other) - { - name = other.name; - columns.insert(other.columns.begin(), other.columns.end()); - for (const auto & desc : other.column_description) - column_description[desc.first].merge(desc.second); - } - - std::string createQuery() - { - std::string create; - std::string db, _; - std::tie(db, _) = get_table_a_column(name); - create = "CREATE DATABASE IF NOT EXISTS " + db + ";\n\n"; - create += "CREATE TABLE IF NOT EXISTS " + name + " (\n"; - for (auto column = columns.begin(); column != columns.end(); ++column) - { - if (column != columns.begin()) - create += ", \n"; - create += *column + " "; - create += column_description[*column].is_array ? "Array(" : ""; - create += type_definition[column_description[*column].type]; - create += column_description[*column].is_array ? ")" : ""; - } - create += "\n) ENGINE = Log;\n\n"; - return create; - } - - std::string insertQuery() - { - std::string insert = "INSERT INTO " + name + "\n"; - insert += "("; - std::set> values = {std::vector(0)}; - for (auto column = columns.begin(); column != columns.end(); ++column) - { - if (column != columns.begin()) - insert += ", "; - insert += *column; - values = decartMul(values, column_description[*column].values); - } - insert += ") VALUES \n"; - for (auto val_set_iter = values.begin(); val_set_iter != values.end(); - ++val_set_iter) - { - if (val_set_iter != values.begin()) - insert += ",\n"; - auto val_set = *val_set_iter; - insert += "("; - for (auto val = val_set.begin(); val != val_set.end(); ++val) - { - if (val != val_set.begin()) - insert += ", "; - insert += *val; - } - insert += ")"; - } - insert += ";\n\n"; - return insert; - } -}; - - -class TableList -{ -public: - std::string main_table; - std::map aliases; - std::unordered_map tables; - std::set nested; - - bool tableExists(const std::string & table_name) const - { - return tables.contains(table_name); - } - - void addColumn(std::string full_column) - { - std::string table, column; - std::tie(table, column) = get_table_a_column(full_column); - if (!table.empty()) - { - if (tables.contains(table)) - { - tables[table].addColumn(column); - return; - } - if (aliases.contains(table)) - { - tables[aliases[table]].addColumn(column); - return; - } - nested.insert(table); - } - tables[main_table].addColumn(full_column); - } - - void addTable(std::string table_name) - { - if (tables.contains(table_name)) - return; - - tables[table_name] = Table(table_name); - if (main_table.empty()) - main_table = table_name; - } - - void addDescription(const Column & description) - { - std::string table = description.name.first; - if (tables.contains(table)) - tables[table].setDescription(description); - } - - TableAndColumn getTable(std::string full_column) const - { - std::string table, column; - std::tie(table, column) = get_table_a_column(full_column); - if (!table.empty()) - { - if (tables.contains(table)) - return std::make_pair(table, column); - - if (aliases.contains(table)) - { - table = aliases.find(table)->second; - return std::make_pair(table, column); - } - } - return std::make_pair(main_table, full_column); - } - - void print() - { - for (auto & table : tables) - { - table.second.print(); - std::cout << "\n"; - } - } - - void merge(TableList other) - { - for (const auto & table : other.tables) - tables[table.first].merge(table.second); - nested.insert(other.nested.begin(), other.nested.end()); - if (main_table.empty()) - main_table = other.main_table; - } -}; - -std::string getAlias(DB::ASTPtr ch) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - return x->alias; - - for (const auto & child : (*ch).children) - { - auto alias = getAlias(child); - if (!alias.empty()) - return alias; - } - return ""; -} - -using FuncHandler = std::function &)>; -std::map handlers = {}; - -FuncRet arrayJoinFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - for (auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - indents.insert(ident->name()); - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = Type::all; - c.is_array = true; - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - FuncRet r(Type::all, ""); - return r; - } - return FuncRet(); -} - -FuncRet inFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents{}; - std::set values{}; - ColumnType type_value = Type::all; - - for (auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - ColumnType type = type_cast(literal->value.getType()); - - auto routine = [&](const auto & arr_values) - { - for (auto & val : arr_values) - { - type = type_cast(val.getType()); - if (type == Type::s || type == Type::d || type == Type::dt) - type = time_type(applyVisitor(DB::FieldVisitorToString(), val)); - type_value &= type; - values.insert(applyVisitor(DB::FieldVisitorToString(), val)); - } - }; - - if (type & Type::a) - { - auto arr_values = literal->value.get(); - routine(arr_values); - } - - if (type & Type::a) - { - auto arr_values = literal->value.get(); - routine(arr_values); - } - } - auto subfunc = std::dynamic_pointer_cast(arg); - if (subfunc) - { - FuncHandler f; - auto arg_func_name = std::dynamic_pointer_cast(arg)->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - FuncRet ret = f(arg, columns); - if (!ret.value.empty()) - { - values.insert(ret.value); - } - type_value &= ret.type; - } - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - c.values.insert(values.begin(), values.end()); - c.generateValues(1); - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - FuncRet r(Type::b | Type::i, ""); - return r; - } - return FuncRet(); -} - -FuncRet arrayFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::string value = "["; - ColumnType type_value = Type::i | Type::f | Type::d | Type::dt | Type::s; - bool no_indent = true; - for (const auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - no_indent = false; - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - ColumnType type = type_cast(literal->value.getType()); - if (type == Type::s || type == Type::d || type == Type::dt) - type = time_type(value); - type_value &= type; - - if (value != "[") - value += ", "; - value += applyVisitor(DB::FieldVisitorToString(), literal->value); - } - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - value += ']'; - FuncRet r(type_value, ""); - r.is_array = true; - if (no_indent) - r.value = value; - return r; - } - return FuncRet(); -} -FuncRet arithmeticFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::set values = {}; - ColumnType type_value = Type::i | Type::f | Type::d | Type::dt; - ColumnType args_types = 0; - bool no_indent = true; - for (auto & arg : x->arguments->children) - { - ColumnType type = 0; - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - no_indent = false; - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - type = type_cast(literal->value.getType()); - auto subfunc = std::dynamic_pointer_cast(arg); - if (subfunc) - { - FuncHandler f; - auto arg_func_name = std::dynamic_pointer_cast(arg)->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - FuncRet ret = f(arg, columns); - type = ret.type; - } - args_types |= type; - } - if (args_types & (Type::d | Type::dt)) - type_value -= Type::f; - if (args_types & Type::f) - type_value -= Type::d | Type::dt; - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - ColumnType ret_type = 0; - if (args_types & Type::dt) - ret_type = Type::dt; - else if (args_types & Type::d) - ret_type = Type::d | Type::dt; - else if (args_types & Type::f) - ret_type = Type::f; - else - ret_type = Type::d | Type::f | Type::dt | Type::i; - FuncRet r(ret_type, ""); - if (no_indent) - { - DB::WriteBufferFromOwnString buf; - formatAST(*ch, buf); - r.value = buf.str(); - } - return r; - } - return FuncRet(); -} -FuncRet likeFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::set values = {}; - ColumnType type_value = Type::s; - for (auto & arg : x->arguments->children) - { - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - indents.insert(ident->name()); - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - std::string value = applyVisitor(DB::FieldVisitorToString(), literal->value); - std::string example{}; - for (size_t i = 0; i != value.size(); ++i) /// NOLINT - { - if (value[i] == '%') - example += randomString(rng() % 10); - else if (value[i] == '_') - example += randomString(1); - else - example += value[i]; - } - values.insert(example); - } - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - c.values.insert(values.begin(), values.end()); - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - FuncRet r(Type::b, ""); - return r; - } - return FuncRet(); -} - -FuncRet simpleFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - std::set indents = {}; - std::set values = {}; - ColumnType type_value = Type::all; - bool is_array = false; - bool no_indent = true; - if (func_to_param_type.contains(boost::algorithm::to_lower_copy(x->name))) - { - type_value &= func_to_param_type[boost::algorithm::to_lower_copy(x->name)]; - is_array = func_to_param_type[boost::algorithm::to_lower_copy(x->name)] & Type::a; - } - for (const auto & arg : x->arguments->children) - { - ColumnType type = Type::all; - std::string value; - auto ident = std::dynamic_pointer_cast(arg); - if (ident) - { - no_indent = false; - indents.insert(ident->name()); - } - auto literal = std::dynamic_pointer_cast(arg); - if (literal) - { - value = applyVisitor(DB::FieldVisitorToString(), literal->value); - type = type_cast(literal->value.getType()); - is_array |= type & Type::a; - } - auto subfunc = std::dynamic_pointer_cast(arg); - if (subfunc) - { - FuncHandler f; - auto arg_func_name = std::dynamic_pointer_cast(arg)->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - FuncRet ret = f(arg, columns); - is_array |= ret.is_array; - type = ret.type; - value = ret.value; - if (value.empty()) - no_indent = false; - } - if (!value.empty()) - { - if (type == Type::i) - { - values.insert(value); - values.insert(value + " + " + randomInteger(1, 10)); - values.insert(value + " - " + randomInteger(1, 10)); - } - if (type == Type::f) - { - values.insert(value); - values.insert(value + " + " + randomFloat(1, 10)); - values.insert(value + " - " + randomFloat(1, 10)); - } - if (type & Type::s || type & Type::d || type & Type::dt) - { - if (type == Type::s) - type = time_type(value); - if (type == Type::s) - values.insert(value); - if (type & Type::d) - { - values.insert(value); - values.insert("toDate(" + value + ") + " + randomInteger(1, 10)); - values.insert("toDate(" + value + ") - " + randomInteger(1, 10)); - } - else if (type & Type::dt) - { - values.insert(value); - values.insert( - "toDateTime(" + value + ") + " + randomInteger(1, 10000)); - values.insert( - "toDateTime(" + value + ") - " + randomInteger(1, 10000)); - } - } - } - if (func_args_same_types.contains(boost::algorithm::to_lower_copy(x->name))) - type_value &= type; - } - for (const auto & indent : indents) - { - auto c = Column(indent); - c.type = type_value; - c.is_array = is_array; - if (func_args_same_types.contains( - boost::algorithm::to_lower_copy(x->name))) - c.values = values; - for (const auto & ind : indents) - if (ind != indent) - c.equals.insert(std::make_pair("", ind)); - - if (columns.contains(indent)) - columns[indent].merge(c); - else - columns[indent] = c; - } - if (func_to_return_type.contains(boost::algorithm::to_lower_copy(x->name))) - { - if (no_indent) - { - DB::WriteBufferFromOwnString buf; - formatAST(*ch, buf); - auto r = func_to_return_type[boost::algorithm::to_lower_copy(x->name)]; - r.value = buf.str(); - return r; - } - return func_to_return_type[boost::algorithm::to_lower_copy(x->name)]; - } - else if (func_to_param_type.contains( - boost::algorithm::to_lower_copy(x->name))) - { - if (no_indent) - { - DB::WriteBufferFromOwnString buf; - formatAST(*ch, buf); - return FuncRet( - func_to_param_type[boost::algorithm::to_lower_copy(x->name)], - buf.str()); - } - return FuncRet( - func_to_param_type[boost::algorithm::to_lower_copy(x->name)], - ""); - } - } - return FuncRet(); -} - -void processFunc(DB::ASTPtr ch, std::map & columns) -{ - auto x = std::dynamic_pointer_cast(ch); - if (x) - { - FuncHandler f; - auto arg_func_name = x->name; - if (handlers.contains(arg_func_name)) - f = handlers[arg_func_name]; - else - f = handlers[""]; - f(ch, columns); - } - else - { - for (const auto & child : (*ch).children) - processFunc(child, columns); - } -} - - -std::set getIndent(DB::ASTPtr ch) -{ - if (!ch) - return {}; - - std::set ret = {}; - auto x = std::dynamic_pointer_cast(ch); - if (x) - ret.insert(x->name()); - for (const auto & child : (*ch).children) - { - auto child_ind = getIndent(child); - ret.insert(child_ind.begin(), child_ind.end()); - } - return ret; -} - - -std::set getSelectIndent( - DB::ASTPtr asp, - std::set & column_alias) -{ - std::set ret = {}; - for (auto & ch : asp->children) - { - auto alias = getAlias(ch); - auto columns = getIndent(ch); - if (alias.empty()) - column_alias.insert(alias); - ret.insert(columns.begin(), columns.end()); - } - return ret; -} - - -std::set -connectedEqualityFind( - const Column & now, - std::map & columns_descriptions, - std::set & visited) -{ - std::set result; - for (const auto & column : now.equals) - if (!visited.contains(column)) - { - visited.insert(column); - auto sub_r = connectedEqualityFind( - columns_descriptions[column.first + "." + column.second], - columns_descriptions, - visited); - result.insert(sub_r.begin(), sub_r.end()); - } - result.insert(now.name); - return result; -} - - -std::map -unificateColumns( - std::map columns_descriptions, - const TableList & all_tables) -{ - for (auto & column : columns_descriptions) - { - std::set changed_equals; - for (const auto & eq : column.second.equals) - { - std::string t, c; - std::tie(t, c) = all_tables.getTable(eq.second); - changed_equals.insert(std::make_pair(t, c)); - } - column.second.equals = changed_equals; - } - std::map result; - for (auto & column : columns_descriptions) - { - std::string t, c; - std::tie(t, c) = all_tables.getTable(column.first); - column.second.name = std::make_pair(t, c); - result[t + "." + c].merge(column.second); - } - std::set visited; - for (auto & column : result) - if (!visited.contains(column.second.name)) - { - auto equal = connectedEqualityFind( - result[column.second.name.first + "." + column.second.name.second], - result, - visited); - for (const auto & c : equal) - result[c.first + "." + c.second].equals = equal; - } - for (auto & column : result) - for (const auto & e : column.second.equals) - column.second.merge(result[e.first + "." + e.second]); - - for (auto & column : result) - { - column.second.unifyType(); - if (column.second.generateValues()) - for (const auto & e : column.second.equals) - result[e.first + "." + e.second].merge(column.second); - - } - return result; -} - -std::vector getSelect(DB::ASTPtr vertex) -{ - auto z = std::dynamic_pointer_cast(vertex); - std::vector result; - if (z) - { - result.push_back(vertex); - return result; - } - - for (const auto & child : (*vertex).children) - { - auto v = getSelect(child); - result.insert(result.end(), v.begin(), v.end()); - } - return result; -} - - -void parseSelectQuery(DB::ASTPtr ast, TableList & all_tables) -{ - if (!ast) - throw std::runtime_error("Bad ASTPtr in parseSelectQuery" + StackTrace().toString()); - - auto select_ast = std::dynamic_pointer_cast(ast); - if (!select_ast) - { - std::cerr << "not select query"; - return; - } - std::set columns = {}; - - auto x = select_ast->tables(); - if (!x) - throw std::runtime_error("There is no tables in query. Nothing to generate."); - - for (auto & child : x->children) - { - auto ch = std::dynamic_pointer_cast(child); - auto table_expression_ast = std::dynamic_pointer_cast(ch->table_expression); - if (table_expression_ast && table_expression_ast->database_and_table_name) - { - auto table_name = *(getIndent(table_expression_ast->database_and_table_name).begin()); - all_tables.addTable(table_name); - auto alias = getAlias(ch); - if (!alias.empty()) - all_tables.aliases[alias] = table_name; - } - if (table_expression_ast && table_expression_ast->subquery) - { - for (const auto & select : getSelect(table_expression_ast->subquery)) - { - TableList local; - parseSelectQuery(select, local); - all_tables.merge(local); - } - } - - if (ch->table_join) - { - auto jch = std::dynamic_pointer_cast(ch->table_join); - if (jch->using_expression_list) - { - auto join_columns = getIndent(jch->using_expression_list); - columns.insert(join_columns.begin(), join_columns.end()); - } - else if (jch->on_expression) - { - auto join_columns = getIndent(jch->on_expression); - columns.insert(join_columns.begin(), join_columns.end()); - } - } - } - - std::set column_aliases; - auto select_columns = getSelectIndent(select_ast->select(), column_aliases); - columns.insert(select_columns.begin(), select_columns.end()); - - auto where_columns = getIndent(select_ast->where()); - columns.insert(where_columns.begin(), where_columns.end()); - - auto groupby_columns = getIndent(select_ast->groupBy()); - columns.insert(groupby_columns.begin(), groupby_columns.end()); - - auto orderby_columns = getIndent(select_ast->orderBy()); - columns.insert(orderby_columns.begin(), orderby_columns.end()); - - auto having_columns = getIndent(select_ast->having()); - columns.insert(having_columns.begin(), having_columns.end()); - - std::map columns_descriptions; - processFunc(ast, columns_descriptions); - - for (const auto & column : columns) - if (!column_aliases.contains(column)) - { - if (!columns_descriptions.contains(column)) - columns_descriptions[column] = Column(column); - all_tables.addColumn(column); - } - - columns_descriptions = unificateColumns(columns_descriptions, all_tables); - for (auto & column : columns_descriptions) - all_tables.addDescription(column.second); -} - - -TableList getTablesFromSelect(std::vector queries) -{ - TableList result; - for (std::string & query : queries) - { - DB::ParserQueryWithOutput parser(query.data() + query.size()); - DB::ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0); - for (auto & select : getSelect(ast)) - { - TableList local; - parseSelectQuery(select, local); - result.merge(local); - } - } - return result; -} - -int main(int argc, const char *argv[]) -{ - try - { - po::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "Display greeting and allowed options.") - ("input,i", po::value(), "Input filename.") - ("output,o", po::value(), "Output filename."); - - po::variables_map vm; - po::store(po::parse_command_line(argc, argv, desc), vm); - po::notify(vm); - - if (vm.count("help") || vm.count("h")) - { - std::cout << "Hello! It is datasets generator for ClickHouse's queries." << std::endl; - std::cout << "Put some query as an input and it will produce queries for table creating and filling." << std::endl; - std::cout << "After that your query could be executed on this tables." << std::endl; - std::cout << desc << std::endl; - return 1; - } - if (vm.count("input")) - if (!freopen(vm["input"].as().c_str(), "r", stdin)) - std::cout << "Error while input." << std::endl; - if (vm.count("output")) - if (!freopen(vm["output"].as().c_str(), "w", stdout)) - std::cout << "Error while output." << std::endl; - if (vm.empty()) - std::cout << "Copy your queries (with semicolons) here, press Enter and Ctrl+D." << std::endl; - } - catch (...) - { - std::cerr << "Got error while parse command line arguments: " << DB::getCurrentExceptionMessage(true) << std::endl; - throw; - } - - handlers["plus"] = arithmeticFunc; - handlers["minus"] = arithmeticFunc; - handlers["like"] = likeFunc; - handlers["array"] = arrayFunc; - handlers["in"] = inFunc; - handlers[""] = simpleFunc; - - std::vector queries; - std::string in; - std::string query{}; - while (getline(std::cin, in)) - { - /// Skip comments - if (in.find("--") != std::string::npos) - continue; - - query += in + " "; - - if (in.find(';') != std::string::npos) - { - queries.push_back(query); - query = ""; - } - } - - try - { - auto result = getTablesFromSelect(queries); - - for (auto & table : result.tables) - { - std::cout << table.second.createQuery(); - std::cout << table.second.insertQuery(); - } - - for (auto & q: queries) - std::cout << q << std::endl; - } - catch (std::string & e) - { - std::cerr << "Exception: " << e << std::endl; - } -} diff --git a/utils/iotest/CMakeLists.txt b/utils/iotest/CMakeLists.txt deleted file mode 100644 index 356986eb493..00000000000 --- a/utils/iotest/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ - -clickhouse_add_executable (iotest iotest.cpp ${SRCS}) -target_link_libraries (iotest PRIVATE clickhouse_common_io) - -clickhouse_add_executable (iotest_nonblock iotest_nonblock.cpp ${SRCS}) -target_link_libraries (iotest_nonblock PRIVATE clickhouse_common_io) - -clickhouse_add_executable (iotest_aio iotest_aio.cpp ${SRCS}) -target_link_libraries (iotest_aio PRIVATE clickhouse_common_io) diff --git a/utils/iotest/iotest.cpp b/utils/iotest/iotest.cpp deleted file mode 100644 index 7a1f35ddd52..00000000000 --- a/utils/iotest/iotest.cpp +++ /dev/null @@ -1,197 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - } -} - - -enum Mode -{ - MODE_NONE = 0, - MODE_READ = 1, - MODE_WRITE = 2, - MODE_ALIGNED = 4, - MODE_DIRECT = 8, - MODE_SYNC = 16, -}; - - -void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t count) -{ - using namespace DB; - - Memory<> direct_buf(block_size, ::getPageSize()); - std::vector simple_buf(block_size); - - char * buf; - if ((mode & MODE_DIRECT)) - buf = direct_buf.data(); - else - buf = simple_buf.data(); - - pcg64 rng(randomSeed()); - - for (size_t i = 0; i < count; ++i) - { - uint64_t rand_result1 = rng(); - uint64_t rand_result2 = rng(); - uint64_t rand_result3 = rng(); - - size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); - size_t offset; - if ((mode & MODE_DIRECT) || (mode & MODE_ALIGNED)) - offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; - else - offset = min_offset + rand_result % (max_offset - min_offset - block_size + 1); - - if (mode & MODE_READ) - { - if (static_cast(block_size) != pread(fd, buf, block_size, offset)) - throwFromErrno("Cannot read", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - else - { - if (static_cast(block_size) != pwrite(fd, buf, block_size, offset)) - throwFromErrno("Cannot write", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); - } - } -} - - -int mainImpl(int argc, char ** argv) -{ - using namespace DB; - - const char * file_name = nullptr; - int mode = MODE_NONE; - UInt64 min_offset = 0; - UInt64 max_offset = 0; - UInt64 block_size = 0; - UInt64 threads = 0; - UInt64 count = 0; - - if (argc != 8) - { - std::cerr << "Usage: " << argv[0] << " file_name (r|w)[a][d][s] min_offset max_offset block_size threads count" << std::endl << - "a - aligned, d - direct, s - sync" << std::endl; - return 1; - } - - file_name = argv[1]; - min_offset = parse(argv[3]); - max_offset = parse(argv[4]); - block_size = parse(argv[5]); - threads = parse(argv[6]); - count = parse(argv[7]); - - for (int i = 0; argv[2][i]; ++i) - { - char c = argv[2][i]; - switch (c) - { - case 'r': - mode |= MODE_READ; - break; - case 'w': - mode |= MODE_WRITE; - break; - case 'a': - mode |= MODE_ALIGNED; - break; - case 'd': - mode |= MODE_DIRECT; - break; - case 's': - mode |= MODE_SYNC; - break; - default: - throw Poco::Exception("Invalid mode"); - } - } - - ThreadPool pool(threads); - - #ifndef OS_DARWIN - int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_DIRECT) ? O_DIRECT : 0) | ((mode & MODE_SYNC) ? O_SYNC : 0)); - #else - int fd = open(file_name, ((mode & MODE_READ) ? O_RDONLY : O_WRONLY) | ((mode & MODE_SYNC) ? O_SYNC : 0)); - #endif - if (-1 == fd) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); - #ifdef OS_DARWIN - if (mode & MODE_DIRECT) - if (fcntl(fd, F_NOCACHE, 1) == -1) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_CLOSE_FILE); - #endif - Stopwatch watch; - - for (size_t i = 0; i < threads; ++i) - pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, count); }); - pool.wait(); - - #if defined(OS_DARWIN) - fsync(fd); - #else - fdatasync(fd); - #endif - - watch.stop(); - - if (0 != close(fd)) - throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - - std::cout << std::fixed << std::setprecision(2) - << "Done " << count << " * " << threads << " ops"; - if (mode & MODE_ALIGNED) - std::cout << " (aligned)"; - if (mode & MODE_DIRECT) - std::cout << " (direct)"; - if (mode & MODE_SYNC) - std::cout << " (sync)"; - std::cout << " in " << watch.elapsedSeconds() << " sec." - << ", " << count * threads / watch.elapsedSeconds() << " ops/sec." - << ", " << count * threads * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec." - << std::endl; - - return 0; -} - - -int main(int argc, char ** argv) -{ - try - { - return mainImpl(argc, argv); - } - catch (const Poco::Exception & e) - { - std::cerr << e.what() << ", " << e.message() << std::endl; - return 1; - } -} diff --git a/utils/iotest/iotest_aio.cpp b/utils/iotest/iotest_aio.cpp deleted file mode 100644 index c0cf002ce58..00000000000 --- a/utils/iotest/iotest_aio.cpp +++ /dev/null @@ -1,203 +0,0 @@ -#if !defined(OS_LINUX) -int main(int, char **) { return 0; } -#else - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_IO_SUBMIT; - extern const int CANNOT_IO_GETEVENTS; - } -} - - -enum Mode -{ - MODE_READ = 1, - MODE_WRITE = 2, -}; - - -void thread(int fd, int mode, size_t min_offset, size_t max_offset, size_t block_size, size_t buffers_count, size_t count) -{ - using namespace DB; - - AIOContext ctx; - - std::vector> buffers(buffers_count); - for (size_t i = 0; i < buffers_count; ++i) - buffers[i] = Memory<>(block_size, ::getPageSize()); - - pcg64_fast rng(randomSeed()); - - size_t in_progress = 0; - size_t blocks_sent = 0; - std::vector buffer_used(buffers_count, false); - std::vector iocbs(buffers_count); - std::vector query_cbs; - std::vector events(buffers_count); - - while (blocks_sent < count || in_progress > 0) - { - /// Prepare queries. - query_cbs.clear(); - for (size_t i = 0; i < buffers_count; ++i) - { - if (blocks_sent >= count || in_progress >= buffers_count) - break; - - if (buffer_used[i]) - continue; - - buffer_used[i] = true; - ++blocks_sent; - ++in_progress; - - char * buf = buffers[i].data(); - - uint64_t rand_result1 = rng(); - uint64_t rand_result2 = rng(); - uint64_t rand_result3 = rng(); - - size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); - size_t offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; - - iocb & cb = iocbs[i]; - memset(&cb, 0, sizeof(cb)); - cb.aio_buf = reinterpret_cast(buf); - cb.aio_fildes = fd; - cb.aio_nbytes = block_size; - cb.aio_offset = offset; - cb.aio_data = static_cast(i); - - if (mode == MODE_READ) - { - cb.aio_lio_opcode = IOCB_CMD_PREAD; - } - else - { - cb.aio_lio_opcode = IOCB_CMD_PWRITE; - } - - query_cbs.push_back(&cb); - } - - /// Send queries. - if (io_submit(ctx.ctx, query_cbs.size(), query_cbs.data()) < 0) - throwFromErrno("io_submit failed", ErrorCodes::CANNOT_IO_SUBMIT); - - /// Receive answers. If we have something else to send, then receive at least one answer (after that send them), otherwise wait all answers. - memset(events.data(), 0, buffers_count * sizeof(events[0])); - int evs = io_getevents(ctx.ctx, (blocks_sent < count ? 1 : in_progress), buffers_count, events.data(), nullptr); - if (evs < 0) - throwFromErrno("io_getevents failed", ErrorCodes::CANNOT_IO_GETEVENTS); - - for (int i = 0; i < evs; ++i) - { - int b = static_cast(events[i].data); - if (events[i].res != static_cast(block_size)) - throw Poco::Exception("read/write error"); - --in_progress; - buffer_used[b] = false; - } - } -} - - -int mainImpl(int argc, char ** argv) -{ - using namespace DB; - - const char * file_name = nullptr; - int mode = MODE_READ; - UInt64 min_offset = 0; - UInt64 max_offset = 0; - UInt64 block_size = 0; - UInt64 buffers_count = 0; - UInt64 threads_count = 0; - UInt64 count = 0; - - if (argc != 9) - { - std::cerr << "Usage: " << argv[0] << " file_name r|w min_offset max_offset block_size threads buffers count" << std::endl; - return 1; - } - - file_name = argv[1]; - if (argv[2][0] == 'w') - mode = MODE_WRITE; - min_offset = parse(argv[3]); - max_offset = parse(argv[4]); - block_size = parse(argv[5]); - threads_count = parse(argv[6]); - buffers_count = parse(argv[7]); - count = parse(argv[8]); - - int fd = open(file_name, ((mode == MODE_READ) ? O_RDONLY : O_WRONLY) | O_DIRECT); - if (-1 == fd) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); - - ThreadPool pool(threads_count); - - Stopwatch watch; - - for (size_t i = 0; i < threads_count; ++i) - pool.scheduleOrThrowOnError([=]{ thread(fd, mode, min_offset, max_offset, block_size, buffers_count, count); }); - pool.wait(); - - watch.stop(); - - if (0 != close(fd)) - throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - - std::cout << std::fixed << std::setprecision(2) - << "Done " << count << " * " << threads_count << " ops"; - std::cout << " in " << watch.elapsedSeconds() << " sec." - << ", " << count * threads_count / watch.elapsedSeconds() << " ops/sec." - << ", " << count * threads_count * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec." - << std::endl; - - return 0; -} - - -int main(int argc, char ** argv) -{ - try - { - return mainImpl(argc, argv); - } - catch (const Poco::Exception & e) - { - std::cerr << e.what() << ", " << e.message() << std::endl; - return 1; - } -} -#endif diff --git a/utils/iotest/iotest_nonblock.cpp b/utils/iotest/iotest_nonblock.cpp deleted file mode 100644 index 32c86282743..00000000000 --- a/utils/iotest/iotest_nonblock.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#if defined (OS_LINUX) -# include -#endif - - -namespace DB -{ - namespace ErrorCodes - { - extern const int CANNOT_OPEN_FILE; - extern const int CANNOT_CLOSE_FILE; - extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - extern const int CANNOT_FSYNC; - extern const int SYSTEM_ERROR; - } -} - - -enum Mode -{ - MODE_READ, - MODE_WRITE, -}; - - -int mainImpl(int argc, char ** argv) -{ - using namespace DB; - - const char * file_name = nullptr; - Mode mode = MODE_READ; - UInt64 min_offset = 0; - UInt64 max_offset = 0; - UInt64 block_size = 0; - UInt64 descriptors = 0; - UInt64 count = 0; - - if (argc != 8) - { - std::cerr << "Usage: " << argv[0] << " file_name r|w min_offset max_offset block_size descriptors count" << std::endl; - return 1; - } - - file_name = argv[1]; - min_offset = parse(argv[3]); - max_offset = parse(argv[4]); - block_size = parse(argv[5]); - descriptors = parse(argv[6]); - count = parse(argv[7]); - - if (!strcmp(argv[2], "r")) - mode = MODE_READ; - else if (!strcmp(argv[2], "w")) - mode = MODE_WRITE; - else - throw Poco::Exception("Invalid mode"); - - std::vector fds(descriptors); - for (size_t i = 0; i < descriptors; ++i) - { - fds[i] = open(file_name, O_SYNC | ((mode == MODE_READ) ? O_RDONLY : O_WRONLY)); - if (-1 == fds[i]) - throwFromErrno("Cannot open file", ErrorCodes::CANNOT_OPEN_FILE); - } - - std::vector buf(block_size); - - pcg64 rng(randomSeed()); - - Stopwatch watch; - - std::vector polls(descriptors); - - for (size_t i = 0; i < descriptors; ++i) - { - polls[i].fd = fds[i]; - polls[i].events = (mode == MODE_READ) ? POLLIN : POLLOUT; - polls[i].revents = 0; - } - - size_t ops = 0; - while (ops < count) - { - if (poll(polls.data(), static_cast(descriptors), -1) <= 0) - throwFromErrno("poll failed", ErrorCodes::SYSTEM_ERROR); - for (size_t i = 0; i < descriptors; ++i) - { - if (!polls[i].revents) - continue; - - if (polls[i].revents != polls[i].events) - throw Poco::Exception("revents indicates error"); - polls[i].revents = 0; - ++ops; - - uint64_t rand_result1 = rng(); - uint64_t rand_result2 = rng(); - uint64_t rand_result3 = rng(); - - size_t rand_result = rand_result1 ^ (rand_result2 << 22) ^ (rand_result3 << 43); - size_t offset; - offset = min_offset + rand_result % ((max_offset - min_offset) / block_size) * block_size; - - if (mode == MODE_READ) - { - if (static_cast(block_size) != pread(fds[i], buf.data(), block_size, offset)) - throwFromErrno("Cannot read", ErrorCodes::CANNOT_READ_FROM_FILE_DESCRIPTOR); - } - else - { - if (static_cast(block_size) != pwrite(fds[i], buf.data(), block_size, offset)) - throwFromErrno("Cannot write", ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR); - } - } - } - - for (size_t i = 0; i < descriptors; ++i) - { -#if defined(OS_DARWIN) - if (fsync(fds[i])) - throwFromErrno("Cannot fsync", ErrorCodes::CANNOT_FSYNC); -#else - if (fdatasync(fds[i])) - throwFromErrno("Cannot fdatasync", ErrorCodes::CANNOT_FSYNC); -#endif - } - - watch.stop(); - - for (size_t i = 0; i < descriptors; ++i) - { - if (0 != close(fds[i])) - throwFromErrno("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); - } - - std::cout << std::fixed << std::setprecision(2) - << "Done " << count << " ops" << " in " << watch.elapsedSeconds() << " sec." - << ", " << count / watch.elapsedSeconds() << " ops/sec." - << ", " << count * block_size / watch.elapsedSeconds() / 1000000 << " MB/sec." - << std::endl; - - return 0; -} - - -int main(int argc, char ** argv) -{ - try - { - return mainImpl(argc, argv); - } - catch (const Poco::Exception & e) - { - std::cerr << e.what() << ", " << e.message() << std::endl; - return 1; - } -} diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt b/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt deleted file mode 100644 index b63373bacf7..00000000000 --- a/utils/zookeeper-adjust-block-numbers-to-parts/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -clickhouse_add_executable (zookeeper-adjust-block-numbers-to-parts main.cpp ${SRCS}) -target_compile_options(zookeeper-adjust-block-numbers-to-parts PRIVATE -Wno-format) -target_link_libraries (zookeeper-adjust-block-numbers-to-parts PRIVATE clickhouse_aggregate_functions dbms clickhouse_common_zookeeper boost::program_options) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp deleted file mode 100644 index 7736921a9c6..00000000000 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ /dev/null @@ -1,286 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - - -std::vector getAllShards(zkutil::ZooKeeper & zk, const std::string & root) -{ - return zk.getChildren(root); -} - - -std::vector removeNotExistingShards(zkutil::ZooKeeper & zk, const std::string & root, const std::vector & shards) -{ - auto existing_shards = getAllShards(zk, root); - std::vector filtered_shards; - filtered_shards.reserve(shards.size()); - for (const auto & shard : shards) - if (std::find(existing_shards.begin(), existing_shards.end(), shard) == existing_shards.end()) - std::cerr << "Shard " << shard << " not found." << std::endl; - else - filtered_shards.emplace_back(shard); - return filtered_shards; -} - - -std::vector getAllTables(zkutil::ZooKeeper & zk, const std::string & root, const std::string & shard) -{ - return zk.getChildren(root + "/" + shard); -} - - -std::vector removeNotExistingTables(zkutil::ZooKeeper & zk, const std::string & root, const std::string & shard, const std::vector & tables) -{ - auto existing_tables = getAllTables(zk, root, shard); - std::vector filtered_tables; - filtered_tables.reserve(tables.size()); - for (const auto & table : tables) - if (std::find(existing_tables.begin(), existing_tables.end(), table) == existing_tables.end()) - std::cerr << "\tTable " << table << " not found on shard " << shard << "." << std::endl; - else - filtered_tables.emplace_back(table); - return filtered_tables; -} - - -Int64 getMaxBlockNumberForPartition(zkutil::ZooKeeper & zk, - const std::string & replica_path, - const std::string & partition_name, - const DB::MergeTreeDataFormatVersion & format_version) -{ - auto replicas_path = replica_path + "/replicas"; - auto replica_hosts = zk.getChildren(replicas_path); - Int64 max_block_num = 0; - for (const auto & replica_host : replica_hosts) - { - auto parts = zk.getChildren(replicas_path + "/" + replica_host + "/parts"); - for (const auto & part : parts) - { - try - { - auto info = DB::MergeTreePartInfo::fromPartName(part, format_version); - if (info.partition_id == partition_name) - max_block_num = std::max(info.max_block, max_block_num); - } - catch (const DB::Exception & ex) - { - std::cerr << ex.displayText() << ", Part " << part << "skipped." << std::endl; - } - } - } - return max_block_num; -} - - -Int64 getCurrentBlockNumberForPartition(zkutil::ZooKeeper & zk, const std::string & part_path) -{ - Coordination::Stat stat; - zk.get(part_path, &stat); - - /// References: - /// https://stackoverflow.com/a/10347910 - /// https://bowenli86.github.io/2016/07/07/distributed%20system/zookeeper/How-does-ZooKeeper-s-persistent-sequential-id-work/ - return (stat.cversion + stat.numChildren) / 2; -} - - -std::unordered_map getPartitionsNeedAdjustingBlockNumbers( - zkutil::ZooKeeper & zk, const std::string & root, const std::vector & shards, const std::vector & tables) -{ - std::unordered_map result; - - std::vector use_shards = shards.empty() ? getAllShards(zk, root) : removeNotExistingShards(zk, root, shards); - - for (const auto & shard : use_shards) - { - std::cout << "Shard: " << shard << std::endl; - std::vector use_tables = tables.empty() ? getAllTables(zk, root, shard) : removeNotExistingTables(zk, root, shard, tables); - - for (const auto & table : use_tables) - { - std::cout << "\tTable: " << table << std::endl; - std::string table_path = root + "/" + shard + "/" + table; - std::string blocks_path = table_path + "/block_numbers"; - - std::vector partitions; - DB::MergeTreeDataFormatVersion format_version; - try - { - format_version = DB::ReplicatedMergeTreeTableMetadata::parse(zk.get(table_path + "/metadata")).data_format_version; - partitions = zk.getChildren(blocks_path); - } - catch (const DB::Exception & ex) - { - std::cerr << ex.displayText() << ", table " << table << " skipped." << std::endl; - continue; - } - - for (const auto & partition : partitions) - { - try - { - std::string part_path = blocks_path + "/" + partition; - Int64 partition_max_block = getMaxBlockNumberForPartition(zk, table_path, partition, format_version); - Int64 current_block_number = getCurrentBlockNumberForPartition(zk, part_path); - if (current_block_number < partition_max_block + 1) - { - std::cout << "\t\tPartition: " << partition << ": current block_number: " << current_block_number - << ", max block number: " << partition_max_block << ". Adjusting is required." << std::endl; - result.emplace(part_path, partition_max_block); - } - } - catch (const DB::Exception & ex) - { - std::cerr << ex.displayText() << ", partition " << partition << " skipped." << std::endl; - } - } - } - } - return result; -} - - -void setCurrentBlockNumber(zkutil::ZooKeeper & zk, const std::string & path, Int64 new_current_block_number) -{ - Int64 current_block_number = getCurrentBlockNumberForPartition(zk, path); - - auto create_ephemeral_nodes = [&](size_t count) - { - std::string block_prefix = path + "/block-"; - Coordination::Requests requests; - requests.reserve(count); - for (size_t i = 0; i != count; ++i) - requests.emplace_back(zkutil::makeCreateRequest(block_prefix, "", zkutil::CreateMode::EphemeralSequential)); - auto responses = zk.multi(requests); - - std::vector paths_created; - paths_created.reserve(responses.size()); - for (const auto & response : responses) - { - const auto * create_response = dynamic_cast(response.get()); - if (!create_response) - { - std::cerr << "\tCould not create ephemeral node " << block_prefix << std::endl; - return false; - } - paths_created.emplace_back(create_response->path_created); - } - - std::sort(paths_created.begin(), paths_created.end()); - for (const auto & path_created : paths_created) - { - Int64 number = DB::parse(path_created.c_str() + block_prefix.size(), path_created.size() - block_prefix.size()); - if (number != current_block_number) - { - char suffix[11] = ""; - size_t size = sprintf(suffix, "%010lld", current_block_number); - std::string expected_path = block_prefix + std::string(suffix, size); - std::cerr << "\t" << path_created << ": Ephemeral node has been created with an unexpected path (expected something like " - << expected_path << ")." << std::endl; - return false; - } - std::cout << "\t" << path_created << std::endl; - ++current_block_number; - } - - return true; - }; - - if (current_block_number >= new_current_block_number) - return; - - std::cout << "Creating ephemeral sequential nodes:" << std::endl; - create_ephemeral_nodes(1); /// Firstly try to create just a single node. - - /// Create other nodes in batches of 50 nodes. - while (current_block_number + 50 <= new_current_block_number) // NOLINT: clang-tidy thinks that the loop is infinite - create_ephemeral_nodes(50); - - create_ephemeral_nodes(new_current_block_number - current_block_number); -} - - -int main(int argc, char ** argv) -try -{ - /// Parse the command line. - namespace po = boost::program_options; - po::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "show help") - ("zookeeper,z", po::value(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.clickhouse.com:2181") - ("path,p", po::value(), "[optional] Path of replica queue to insert node (without trailing slash). By default it's /clickhouse/tables") - ("shard,s", po::value(), "[optional] Shards to process, comma-separated. If not specified then the utility will process all the shards.") - ("table,t", po::value(), "[optional] Tables to process, comma-separated. If not specified then the utility will process all the tables.") - ("dry-run", "[optional] Specify if you want this utility just to analyze block numbers without any changes."); - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - auto show_usage = [&] - { - std::cout << "Usage: " << std::endl; - std::cout << " " << argv[0] << " [options]" << std::endl; - std::cout << desc << std::endl; - }; - - if (options.count("help") || (argc == 1)) - { - std::cout << "This utility adjusts the /block_numbers zookeeper nodes to the correct block number in partition." << std::endl; - std::cout << "It might be useful when incorrect block numbers stored in zookeeper don't allow you to insert data into a table or drop/detach a partition." << std::endl; - show_usage(); - return 0; - } - - if (!options.count("zookeeper")) - { - std::cerr << "Option --zookeeper should be set." << std::endl; - show_usage(); - return 1; - } - - std::string root = options.count("path") ? options.at("path").as() : "/clickhouse/tables"; - - std::vector shards, tables; - if (options.count("shard")) - boost::split(shards, options.at("shard").as(), boost::algorithm::is_any_of(",")); - if (options.count("table")) - boost::split(tables, options.at("table").as(), boost::algorithm::is_any_of(",")); - - /// Check if the adjusting of the block numbers is required. - std::cout << "Checking if adjusting of the block numbers is required:" << std::endl; - zkutil::ZooKeeper zookeeper(options.at("zookeeper").as()); - auto part_paths_with_max_block_numbers = getPartitionsNeedAdjustingBlockNumbers(zookeeper, root, shards, tables); - - if (part_paths_with_max_block_numbers.empty()) - { - std::cout << "No adjusting required." << std::endl; - return 0; - } - - std::cout << "Required adjusting of " << part_paths_with_max_block_numbers.size() << " block numbers." << std::endl; - - /// Adjust the block numbers. - if (options.count("dry-run")) - { - std::cout << "This is a dry-run, exiting." << std::endl; - return 0; - } - - std::cout << std::endl << "Adjusting the block numbers:" << std::endl; - for (const auto & [part_path, max_block_number] : part_paths_with_max_block_numbers) - setCurrentBlockNumber(zookeeper, part_path, max_block_number + 1); - - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -} diff --git a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt b/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt deleted file mode 100644 index 4c7a9ba9560..00000000000 --- a/utils/zookeeper-create-entry-to-download-part/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (zookeeper-create-entry-to-download-part main.cpp ${SRCS}) -target_link_libraries (zookeeper-create-entry-to-download-part PRIVATE dbms clickhouse_common_zookeeper boost::program_options) diff --git a/utils/zookeeper-create-entry-to-download-part/main.cpp b/utils/zookeeper-create-entry-to-download-part/main.cpp deleted file mode 100644 index b92857929b7..00000000000 --- a/utils/zookeeper-create-entry-to-download-part/main.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include - - -int main(int argc, char ** argv) -try -{ - boost::program_options::options_description desc("Allowed options"); - desc.add_options() - ("help,h", "produce help message") - ("address,a", boost::program_options::value()->required(), - "addresses of ZooKeeper instances, comma separated. Example: example01e.clickhouse.com:2181") - ("path,p", boost::program_options::value()->required(), "path of replica queue to insert node (without trailing slash)") - ("name,n", boost::program_options::value()->required(), "name of part to download") - ; - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Insert log entry to replication queue to download part from any replica." << std::endl; - std::cout << "Usage: " << argv[0] << " [options]" << std::endl; - std::cout << desc << std::endl; - return 1; - } - - std::string path = options.at("path").as(); - std::string name = options.at("name").as(); - - zkutil::ZooKeeper zookeeper(options.at("address").as()); - - DB::ReplicatedMergeTreeLogEntry entry; - entry.type = DB::ReplicatedMergeTreeLogEntry::MERGE_PARTS; - entry.source_parts = {name}; - entry.new_part_name = name; - - zookeeper.create(path + "/queue-", entry.toString(), zkutil::CreateMode::PersistentSequential); - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; - throw; -} From e3f5230822a5761c50b2560121e3c5cd8a2c0bd0 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Sat, 5 Nov 2022 01:10:51 +0000 Subject: [PATCH 35/47] Fix bug in CAST function parser --- src/Parsers/ExpressionListParsers.cpp | 5 ++++- .../queries/0_stateless/02476_fix_cast_parser_bug.reference | 0 tests/queries/0_stateless/02476_fix_cast_parser_bug.sql | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02476_fix_cast_parser_bug.reference create mode 100644 tests/queries/0_stateless/02476_fix_cast_parser_bug.sql diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index c362340d013..2a41196c15d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1197,6 +1197,9 @@ public: if (!mergeElement()) return false; + if (elements.size() != 2) + return false; + elements = {makeASTFunction("CAST", elements[0], elements[1])}; finished = true; return true; @@ -1406,7 +1409,7 @@ public: protected: bool getResultImpl(ASTPtr & node) override { - if (state == 2) + if (state == 2 && elements.size() == 2) std::swap(elements[1], elements[0]); node = makeASTFunction("position", std::move(elements)); diff --git a/tests/queries/0_stateless/02476_fix_cast_parser_bug.reference b/tests/queries/0_stateless/02476_fix_cast_parser_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02476_fix_cast_parser_bug.sql b/tests/queries/0_stateless/02476_fix_cast_parser_bug.sql new file mode 100644 index 00000000000..6b01b3a8c0b --- /dev/null +++ b/tests/queries/0_stateless/02476_fix_cast_parser_bug.sql @@ -0,0 +1 @@ +SELECT CAST(a, b -> c) ++; -- { clientError SYNTAX_ERROR } From 73dfe4acd7d527be1ed5698b2bf2db83bc81bc0a Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sat, 5 Nov 2022 17:56:55 +0800 Subject: [PATCH 36/47] Add to default white list --- docs/en/operations/clickhouse-keeper.md | 4 ++-- src/Coordination/CoordinationSettings.cpp | 2 +- src/Coordination/FourLetterCommand.cpp | 2 +- src/Coordination/KeeperServer.cpp | 7 ++++--- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 17cf3ade6ab..cf2964b1a7c 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -126,7 +126,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively. -The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`. +The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif`. You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port. @@ -309,7 +309,7 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Fail to scheduled snapshot creation task.` if failed. +- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. ``` 100 diff --git a/src/Coordination/CoordinationSettings.cpp b/src/Coordination/CoordinationSettings.cpp index 3e03ee0d6f4..08b8668a3ab 100644 --- a/src/Coordination/CoordinationSettings.cpp +++ b/src/Coordination/CoordinationSettings.cpp @@ -36,7 +36,7 @@ void CoordinationSettings::loadFromConfig(const String & config_elem, const Poco } -const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv"; +const String KeeperConfigurationAndSettings::DEFAULT_FOUR_LETTER_WORD_CMD = "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif"; KeeperConfigurationAndSettings::KeeperConfigurationAndSettings() : server_id(NOT_EXIST) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 402270640d2..82123dc8218 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -481,7 +481,7 @@ String ApiVersionCommand::run() String CreateSnapshotCommand::run() { auto log_index = keeper_dispatcher.createSnapshot(); - return log_index > 0 ? std::to_string(log_index) : "Fail to scheduled snapshot creation task."; + return log_index > 0 ? std::to_string(log_index) : "Failed to schedule snapshot creation task."; } String LogInfoCommand::run() diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 5b2659e9a1b..487d0dc4cc3 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -913,15 +913,16 @@ uint64_t KeeperServer::createSnapshot() if (log_idx != 0) LOG_INFO(log, "Snapshot creation scheduled with last committed log index {}.", log_idx); else - LOG_WARNING(log, "Fail to scheduled snapshot creation task."); + LOG_WARNING(log, "Failed to schedule snapshot creation task."); return log_idx; } KeeperLogInfo KeeperServer::getKeeperLogInfo() { KeeperLogInfo log_info; - log_info.first_log_idx = state_manager->load_log_store()->start_index(); - log_info.first_log_term = state_manager->load_log_store()->term_at(log_info.first_log_idx); + auto log_store = state_manager->load_log_store(); + log_info.first_log_idx = log_store->start_index(); + log_info.first_log_term = log_store->term_at(log_info.first_log_idx); log_info.last_log_idx = raft_instance->get_last_log_idx(); log_info.last_log_term = raft_instance->get_last_log_term(); log_info.last_committed_log_idx = raft_instance->get_committed_log_idx(); From 00e93482c85df1ecaa1bbb66bb6ee4e2e6ea966e Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sat, 5 Nov 2022 18:00:26 +0800 Subject: [PATCH 37/47] fix docs --- docs/en/operations/clickhouse-keeper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index cf2964b1a7c..0324f742988 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -309,7 +309,7 @@ Sessions with Ephemerals (1): /clickhouse/task_queue/ddl ``` -- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. +- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. Note that `lgif` command can help you determine whether the snapshot is done. ``` 100 From b0b3942aae0452774b229ecfda1dc357ef85ddaf Mon Sep 17 00:00:00 2001 From: Rami Dridi Date: Sat, 5 Nov 2022 21:33:47 +0100 Subject: [PATCH 38/47] docs : updating comparing mode command and results --- .../utilities/clickhouse-benchmark.md | 58 +++++++------------ 1 file changed, 20 insertions(+), 38 deletions(-) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 1a250ea5481..faa7ac75c74 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -109,56 +109,38 @@ In the report you can find: `clickhouse-benchmark` can compare performances for two running ClickHouse servers. -To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately. +To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown in a table. ## Example {#clickhouse-benchmark-example} ``` bash -$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10 +$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark --host=localhost --port=9001 --host=localhost --port=9000 -i 10 ``` ``` text Loaded 1 queries. -Queries executed: 6. +Queries executed: 5. -localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459. +localhost:9001, queries 2, QPS: 3.764, RPS: 75446929.370, MiB/s: 575.614, result RPS: 37639659.982, result MiB/s: 287.168. +localhost:9000, queries 3, QPS: 3.815, RPS: 76466659.385, MiB/s: 583.394, result RPS: 38148392.297, result MiB/s: 291.049. -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.159 sec. -30.000% 0.160 sec. -40.000% 0.160 sec. -50.000% 0.162 sec. -60.000% 0.164 sec. -70.000% 0.165 sec. -80.000% 0.166 sec. -90.000% 0.166 sec. -95.000% 0.167 sec. -99.000% 0.167 sec. -99.900% 0.167 sec. -99.990% 0.167 sec. +0.000% 0.258 sec. 0.250 sec. +10.000% 0.258 sec. 0.250 sec. +20.000% 0.258 sec. 0.250 sec. +30.000% 0.258 sec. 0.267 sec. +40.000% 0.258 sec. 0.267 sec. +50.000% 0.273 sec. 0.267 sec. +60.000% 0.273 sec. 0.267 sec. +70.000% 0.273 sec. 0.267 sec. +80.000% 0.273 sec. 0.269 sec. +90.000% 0.273 sec. 0.269 sec. +95.000% 0.273 sec. 0.269 sec. +99.000% 0.273 sec. 0.269 sec. +99.900% 0.273 sec. 0.269 sec. +99.990% 0.273 sec. 0.269 sec. - - -Queries executed: 10. - -localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986. - -0.000% 0.159 sec. -10.000% 0.159 sec. -20.000% 0.160 sec. -30.000% 0.163 sec. -40.000% 0.164 sec. -50.000% 0.165 sec. -60.000% 0.166 sec. -70.000% 0.166 sec. -80.000% 0.167 sec. -90.000% 0.167 sec. -95.000% 0.170 sec. -99.000% 0.172 sec. -99.900% 0.172 sec. -99.990% 0.172 sec. +No difference proven at 99.5% confidence ``` [Original article](https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark.md) From e2c23344d14e2a90e1dda9899ee3a41368f50089 Mon Sep 17 00:00:00 2001 From: Camilo Sierra Date: Mon, 7 Nov 2022 09:54:45 +0100 Subject: [PATCH 39/47] full example using AggregatingMergeTree Discussing with a dev the `AggregatingMergeTree` usage was not clear, they do not understand how use the `AggregateFunction` or how we could get the final aggregated value. I hope this full example could better show the capabilities of this feature and help the new users to better understand it. --- .../mergetree-family/aggregatingmergetree.md | 41 ++++++++++++++----- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index ba518f51657..267e5c81dda 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -68,36 +68,57 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im ## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view} -`AggregatingMergeTree` materialized view that watches the `test.visits` table: +We will create the table `test.visits` that contain the raw data: ``` sql -CREATE MATERIALIZED VIEW test.basic -ENGINE = AggregatingMergeTree() PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate) +CREATE TABLE test.visits + ( + StartDate DateTime64 NOT NULL, + CounterID UInt64, + Sign Nullable(Int32), + UserID Nullable(Int32) +) ENGINE = MergeTree ORDER BY (StartDate, CounterID); +``` + +`AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type: + +``` sql +CREATE MATERIALIZED VIEW test.mv_visits +( + StartDate DateTime64 NOT NULL, + CounterID UInt64, + Visits AggregateFunction(sum, Nullable(Int32)), + Users AggregateFunction(uniq, Nullable(Int32)) +) +ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID) AS SELECT - CounterID, StartDate, - sumState(Sign) AS Visits, + CounterID, + sumState(Sign) AS Visits, uniqState(UserID) AS Users FROM test.visits -GROUP BY CounterID, StartDate; +GROUP BY StartDate, CounterID; ``` Inserting data into the `test.visits` table. ``` sql -INSERT INTO test.visits ... +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1667446031, 1, 3, 4) +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1667446031, 1, 6, 3) ``` -The data are inserted in both the table and view `test.basic` that will perform the aggregation. +The data are inserted in both the table and the materialized view `test.mv_visits`. -To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the view `test.basic`: +To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: ``` sql SELECT StartDate, sumMerge(Visits) AS Visits, uniqMerge(Users) AS Users -FROM test.basic +FROM test.mv_visits GROUP BY StartDate ORDER BY StartDate; ``` From 940e859eadf0d686952aa27472b7433fbd9c31aa Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Nov 2022 11:40:18 +0100 Subject: [PATCH 40/47] Add debug information to nightly builds --- .github/workflows/nightly.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 612bb1f8f9b..7dff1e205a1 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -10,6 +10,9 @@ env: workflow_dispatch: jobs: + Debug: + # The task for having a preserved ENV and event.json for later investigation + uses: ./.github/workflows/debug.yml DockerHubPushAarch64: runs-on: [self-hosted, style-checker-aarch64] steps: From 65c86e9ebd8ea03bedeec0422006fb730d7ce5a1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Nov 2022 13:10:18 +0100 Subject: [PATCH 41/47] Add `on: workflow_call` to debug CI --- .github/workflows/debug.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/debug.yml b/.github/workflows/debug.yml index fa980a95a39..993fa8c0d07 100644 --- a/.github/workflows/debug.yml +++ b/.github/workflows/debug.yml @@ -2,7 +2,7 @@ name: Debug 'on': - [push, pull_request, release, workflow_dispatch] + [push, pull_request, release, workflow_dispatch, workflow_call] jobs: DebugInfo: From 105c6a70f00c25d6e04d5ad2ee179a90f3f370a3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Nov 2022 13:19:40 +0100 Subject: [PATCH 42/47] Update GH actions checkers --- docker/test/style/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 683124feaa0..cb8c914e53d 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,7 +1,7 @@ # docker build -t clickhouse/style-test . FROM ubuntu:20.04 -ARG ACT_VERSION=0.2.25 -ARG ACTIONLINT_VERSION=1.6.8 +ARG ACT_VERSION=0.2.33 +ARG ACTIONLINT_VERSION=1.6.22 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" From e7fbe40b1c112adb985c39eda064134801d2a384 Mon Sep 17 00:00:00 2001 From: Igor Nikonov <954088+devcrafter@users.noreply.github.com> Date: Mon, 7 Nov 2022 14:30:52 +0100 Subject: [PATCH 43/47] Simple fixes for restart replica description --- docs/en/sql-reference/statements/system.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index e9ff4d45c79..2257bbe64f3 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -281,8 +281,8 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name` ### RESTART REPLICA -Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed. -Initialization replication queue based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations. +Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of truth and add tasks to Zookeeper queue if needed. +Initialization replication queue based on ZooKeeper date happens in the same way as for `ATTACH TABLE` statement. For a short time, the table will be unavailable for any operations. ``` sql SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name From d6bfbeb95f1ec6ede8e701aa6bfb3220c2e6b6f9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 7 Nov 2022 15:53:43 +0100 Subject: [PATCH 44/47] Fix TSan errors (correctly ignore _exit interception) Because safeExit() does not includes header with defines, it does not know about THREAD_SANITIZER. And it also fixes Azure blob storage, actually everything is fine with the sdk itself, the problem is only in TSan that intercepts _exit() and report leak, even thoug that tread will be joined later. Refs: #23056 (#23616) Fixes: #38474 Closes: #42640 Fixes: #42638 Fixes: #34988 Cc: @alexey-milovidov, @tavplubix Signed-off-by: Azat Khuzhin --- base/base/safeExit.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index ddb93dac65b..2d4e5cf43b4 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -3,6 +3,7 @@ #endif #include #include +#include /// for THREAD_SANITIZER [[noreturn]] void safeExit(int code) { From d446eca8826e1f45c9380d753fc8dba1677e314e Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 7 Nov 2022 11:56:09 -0500 Subject: [PATCH 45/47] move troubleshooting to FAQ --- .../{troubleshooting.md => _troubleshooting.md} | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) rename docs/en/operations/{troubleshooting.md => _troubleshooting.md} (98%) diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/_troubleshooting.md similarity index 98% rename from docs/en/operations/troubleshooting.md rename to docs/en/operations/_troubleshooting.md index ad92e773ea3..aed63ec4d0f 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/_troubleshooting.md @@ -1,9 +1,5 @@ ---- -slug: /en/operations/troubleshooting -sidebar_position: 46 -sidebar_label: Troubleshooting -title: Troubleshooting ---- + +[//]: # (This file is included in FAQ > Troubleshooting) - [Installation](#troubleshooting-installation-errors) - [Connecting to the server](#troubleshooting-accepts-no-connections) From b4ecbbf22469cd4a827aec1cd604b3ea900a26e1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 7 Nov 2022 21:07:09 +0300 Subject: [PATCH 46/47] Update safeExit.cpp --- base/base/safeExit.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp index 2d4e5cf43b4..12ad9dc12ee 100644 --- a/base/base/safeExit.cpp +++ b/base/base/safeExit.cpp @@ -1,6 +1,7 @@ #if defined(OS_LINUX) # include #endif +#include #include #include #include /// for THREAD_SANITIZER From 434c0f24f9a342470d702db60a740570597c10f5 Mon Sep 17 00:00:00 2001 From: Igor Nikonov <954088+devcrafter@users.noreply.github.com> Date: Mon, 7 Nov 2022 19:40:59 +0100 Subject: [PATCH 47/47] Update docs/en/sql-reference/statements/system.md Co-authored-by: Alexander Tokmakov --- docs/en/sql-reference/statements/system.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 2257bbe64f3..c8b104ea91f 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -282,7 +282,7 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name` ### RESTART REPLICA Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of truth and add tasks to Zookeeper queue if needed. -Initialization replication queue based on ZooKeeper date happens in the same way as for `ATTACH TABLE` statement. For a short time, the table will be unavailable for any operations. +Initialization of replication queue based on ZooKeeper data happens in the same way as for `ATTACH TABLE` statement. For a short time, the table will be unavailable for any operations. ``` sql SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name