From 13c5c621c1f7c49763773a278893b932806c5f35 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 18 Nov 2019 00:41:40 +0300 Subject: [PATCH 01/12] Attempt to ignore redundant copies of parts after move and restart. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 2843ff14d79..f6a901c2cb2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -760,6 +760,12 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (startsWith(it.name(), "tmp")) continue; + if (Poco::Path(it.path(), "delete-on-destroy.txt").isFile()) + { + it->remove(true); + continue; + } + part_names_with_disks.emplace_back(it.name(), disk_ptr); } } @@ -2528,6 +2534,16 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) auto part_it = data_parts_indexes.insert(part_copy).first; modifyPartState(part_it, DataPartState::Committed); + + try + { + Poco::File(original_active_part->getFullPath() + "/delete-on-destroy.txt").createFile(); + } + catch (...) + { + LOG_WARNING(log, "Exception has occurred while creating DeleteOnDestroy marker: '" + << original_active_part->getFullPath() + "/delete-on-destroy.txt'."); + } return; } } From efa73608b8a09f821bf225ee5f708f1edae24e04 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 18 Nov 2019 11:42:46 +0300 Subject: [PATCH 02/12] Separated constant path to DeleteOnDestroy marker. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index f6a901c2cb2..00762cc4254 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -96,6 +96,12 @@ namespace ErrorCodes } +namespace +{ + const char * DELETE_ON_DESTROY_MARKER_PATH = "delete-on-destroy.txt"; +} + + MergeTreeData::MergeTreeData( const String & database_, const String & table_, @@ -760,7 +766,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (startsWith(it.name(), "tmp")) continue; - if (Poco::Path(it.path(), "delete-on-destroy.txt").isFile()) + if (Poco::Path(it.path(), DELETE_ON_DESTROY_MARKER_PATH).isFile()) { it->remove(true); continue; @@ -2535,14 +2541,15 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) auto part_it = data_parts_indexes.insert(part_copy).first; modifyPartState(part_it, DataPartState::Committed); + Poco::Path marker_path(Poco::Path(original_active_part->getFullPath()), DELETE_ON_DESTROY_MARKER_PATH); try { - Poco::File(original_active_part->getFullPath() + "/delete-on-destroy.txt").createFile(); + Poco::File(marker_path).createFile(); } catch (...) { LOG_WARNING(log, "Exception has occurred while creating DeleteOnDestroy marker: '" - << original_active_part->getFullPath() + "/delete-on-destroy.txt'."); + << marker_path.toString() + "'."); } return; } From 0ab67c7b1f82084fc37005478d12f7e2fe1429ab Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Tue, 19 Nov 2019 10:38:17 +0100 Subject: [PATCH 03/12] Log when part is removed after move --- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 132aebb70db..dff9d177f87 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -346,6 +346,11 @@ MergeTreeDataPart::~MergeTreeDataPart() } dir.remove(true); + + if (state == State::DeleteOnDestroy) + { + LOG_TRACE(storage.log, "Removed part from old location " << path ); + } } catch (...) { From f545da40e69c3eaf8d671a9fb94b677658b28a68 Mon Sep 17 00:00:00 2001 From: filimonov <1549571+filimonov@users.noreply.github.com> Date: Tue, 19 Nov 2019 11:08:08 +0100 Subject: [PATCH 04/12] style --- dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index dff9d177f87..ce893dfe666 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -347,10 +347,10 @@ MergeTreeDataPart::~MergeTreeDataPart() dir.remove(true); - if (state == State::DeleteOnDestroy) + if (state == State::DeleteOnDestroy) { - LOG_TRACE(storage.log, "Removed part from old location " << path ); - } + LOG_TRACE(storage.log, "Removed part from old location " << path); + } } catch (...) { From fe3e89fb3876641e48ee24989cc62cd7d0fd3c0e Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 19 Nov 2019 16:11:23 +0300 Subject: [PATCH 05/12] Fixed tests. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 00762cc4254..ccf1b9b1b38 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -766,8 +766,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (startsWith(it.name(), "tmp")) continue; - if (Poco::Path(it.path(), DELETE_ON_DESTROY_MARKER_PATH).isFile()) - { + if (Poco::Path(Poco::Path(it.path()), DELETE_ON_DESTROY_MARKER_PATH).isFile()) it->remove(true); continue; } From 89659a9129aa2e7fdaa64e999d51bf8d9e42eb22 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 19 Nov 2019 16:32:13 +0300 Subject: [PATCH 06/12] Fixed a typo. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index ccf1b9b1b38..84d7133698f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -767,6 +767,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) continue; if (Poco::Path(Poco::Path(it.path()), DELETE_ON_DESTROY_MARKER_PATH).isFile()) + { it->remove(true); continue; } From c283776e846128f5589705ff3a1645785c1b1938 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Tue, 19 Nov 2019 16:40:36 +0300 Subject: [PATCH 07/12] Really fixed tests. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 84d7133698f..35d45dea493 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -766,9 +766,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (startsWith(it.name(), "tmp")) continue; - if (Poco::Path(Poco::Path(it.path()), DELETE_ON_DESTROY_MARKER_PATH).isFile()) - { - it->remove(true); + if (Poco::Path(it.path(), DELETE_ON_DESTROY_MARKER_PATH).isFile()) + { + Poco::File(it.path()).remove(true); continue; } From 374e0f7bb5b83b98675c9c820edb040b1c276737 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 20 Nov 2019 08:33:19 +0300 Subject: [PATCH 08/12] Finally fixed tests. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 35d45dea493..27e89b34b41 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -766,7 +766,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (startsWith(it.name(), "tmp")) continue; - if (Poco::Path(it.path(), DELETE_ON_DESTROY_MARKER_PATH).isFile()) + Poco::Path marker_path(it.path(), DELETE_ON_DESTROY_MARKER_PATH); + if (Poco::File(marker_path).exists()) { Poco::File(it.path()).remove(true); continue; From 9cf7ef03af82f9b39cc9289aab4327a2cbf1119b Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Wed, 4 Dec 2019 13:47:10 +0300 Subject: [PATCH 09/12] Fixed handling of `MergeTreeData::DataPartPtr`'s in `MergeTreeData::swapActivePart()`. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 27e89b34b41..157b682f544 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2528,7 +2528,7 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) { auto lock = lockParts(); - for (const auto & original_active_part : getDataPartsStateRange(DataPartState::Committed)) + for (auto original_active_part : getDataPartsStateRange(DataPartState::Committed)) { if (part_copy->name == original_active_part->name) { From a2f238d8daf2ad9518cf5bafbade22889e58a11b Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 9 Dec 2019 16:44:11 +0300 Subject: [PATCH 10/12] Added more logging and switched to detaching stale part instead of removing. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 157b682f544..1b8a402d01f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -766,13 +766,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (startsWith(it.name(), "tmp")) continue; - Poco::Path marker_path(it.path(), DELETE_ON_DESTROY_MARKER_PATH); - if (Poco::File(marker_path).exists()) - { - Poco::File(it.path()).remove(true); - continue; - } - part_names_with_disks.emplace_back(it.name(), disk_ptr); } } @@ -814,6 +807,16 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) part->relative_path = part_name; bool broken = false; + Poco::Path part_path(getFullPathOnDisk(part_disk_ptr), part_name); + Poco::Path marker_path(part_path, DELETE_ON_DESTROY_MARKER_PATH); + if (Poco::File(marker_path).exists()) + { + LOG_WARNING(log, "Detaching stale part " << getFullPathOnDisk(part_disk_ptr) << part_name << ", which should have been deleted after a move. That can only happen after unclean restart of ClickHouse."); + std::lock_guard loading_lock(mutex); + broken_parts_to_detach.push_back(part); + ++suspicious_broken_parts; + } + try { part->loadColumnsChecksumsIndexes(require_part_metadata, true); @@ -2547,10 +2550,9 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) { Poco::File(marker_path).createFile(); } - catch (...) + catch (Poco::Exception & e) { - LOG_WARNING(log, "Exception has occurred while creating DeleteOnDestroy marker: '" - << marker_path.toString() + "'."); + LOG_ERROR(log, e.what() << " (while creating DeleteOnDestroy marker: " + backQuote(marker_path.toString()) + ")"); } return; } From 6afd8d78055c6b5a18cb75ce910246aae09ed747 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 9 Dec 2019 19:20:56 +0300 Subject: [PATCH 11/12] Added `test_multiple_disk::test_kill_while_insert`, reworked log message and fixed logic of treating stale copies. --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 3 +- dbms/tests/integration/helpers/cluster.py | 4 +- .../integration/test_multiple_disks/test.py | 50 ++++++++++++++++++- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 1b8a402d01f..ba121248406 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -811,10 +811,11 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) Poco::Path marker_path(part_path, DELETE_ON_DESTROY_MARKER_PATH); if (Poco::File(marker_path).exists()) { - LOG_WARNING(log, "Detaching stale part " << getFullPathOnDisk(part_disk_ptr) << part_name << ", which should have been deleted after a move. That can only happen after unclean restart of ClickHouse."); + LOG_WARNING(log, "Detaching stale part " << getFullPathOnDisk(part_disk_ptr) << part_name << ", which should have been deleted after a move. That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part."); std::lock_guard loading_lock(mutex); broken_parts_to_detach.push_back(part); ++suspicious_broken_parts; + return; } try diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index b2620cd01f9..6463667435e 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -628,11 +628,11 @@ class ClickHouseInstance: def http_query(self, sql, data=None): return urllib.urlopen("http://" + self.ip_address + ":8123/?query=" + urllib.quote(sql, safe=''), data).read() - def restart_clickhouse(self, stop_start_wait_sec=5): + def restart_clickhouse(self, stop_start_wait_sec=5, kill=False): if not self.stay_alive: raise Exception("clickhouse can be restarted only with stay_alive=True instance") - self.exec_in_container(["bash", "-c", "pkill clickhouse"], user='root') + self.exec_in_container(["bash", "-c", "pkill {} clickhouse".format("-9" if kill else "")], user='root') time.sleep(stop_start_wait_sec) self.exec_in_container(["bash", "-c", "{} --daemon".format(CLICKHOUSE_START_COMMAND)], user=str(os.getuid())) diff --git a/dbms/tests/integration/test_multiple_disks/test.py b/dbms/tests/integration/test_multiple_disks/test.py index 38ed618845f..e508239e2e5 100644 --- a/dbms/tests/integration/test_multiple_disks/test.py +++ b/dbms/tests/integration/test_multiple_disks/test.py @@ -3,6 +3,7 @@ import pytest import random import re import string +import threading import time from multiprocessing.dummy import Pool from helpers.client import QueryRuntimeException @@ -15,6 +16,7 @@ node1 = cluster.add_instance('node1', config_dir='configs', main_configs=['configs/logs_config.xml'], with_zookeeper=True, + stay_alive=True, tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/external:size=200M'], macros={"shard": 0, "replica": 1} ) @@ -22,6 +24,7 @@ node2 = cluster.add_instance('node2', config_dir='configs', main_configs=['configs/logs_config.xml'], with_zookeeper=True, + stay_alive=True, tmpfs=['/jbod1:size=40M', '/jbod2:size=40M', '/external:size=200M'], macros={"shard": 0, "replica": 2} ) @@ -1028,6 +1031,7 @@ def test_rename(start_cluster): node1.query("DROP TABLE IF EXISTS default.renaming_table1") node1.query("DROP TABLE IF EXISTS test.renaming_table2") + def test_freeze(start_cluster): try: node1.query(""" @@ -1057,6 +1061,50 @@ def test_freeze(start_cluster): node1.exec_in_container(["bash", "-c", "find /jbod1/shadow -name '*.mrk2' | grep '.*'"]) node1.exec_in_container(["bash", "-c", "find /external/shadow -name '*.mrk2' | grep '.*'"]) - finally: node1.query("DROP TABLE IF EXISTS default.freezing_table") + + +def test_kill_while_insert(start_cluster): + try: + name = "test_kill_while_merge" + + node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) + + node1.query(""" + CREATE TABLE {name} ( + s String + ) ENGINE = MergeTree + ORDER BY tuple() + SETTINGS storage_policy='small_jbod_with_external' + """.format(name=name)) + + data = [] + dates = [] + for i in range(10): + data.append(get_random_string(1024 * 1024)) # 1MB value + node1.query("INSERT INTO {name} VALUES {}".format(','.join(["('" + s + "')" for s in data]), name=name)) + + disks = get_used_disks_for_table(node1, name) + assert set(disks) == {"jbod1"} + + start_time = time.time() + long_select = threading.Thread(target=node1.query, args=("SELECT sleep(3) FROM {name}".format(name=name),)) + long_select.start() + + time.sleep(0.5) + + node1.query("ALTER TABLE {name} MOVE PARTITION tuple() TO DISK 'external'".format(name=name)) + assert time.time() - start_time < 2 + node1.restart_clickhouse(kill=True) + + try: + long_select.join() + except: + """""" + + time.sleep(0.5) + assert node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["10"] + + finally: + """Don't drop table afterwards to not shadow assertion.""" From ea44510b44cc5b31c0266e2a6fe064d846f40324 Mon Sep 17 00:00:00 2001 From: Vladimir Chebotarev Date: Mon, 9 Dec 2019 20:53:52 +0300 Subject: [PATCH 12/12] Minor fix of test. --- dbms/tests/integration/test_multiple_disks/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_multiple_disks/test.py b/dbms/tests/integration/test_multiple_disks/test.py index e508239e2e5..57f54fe1edc 100644 --- a/dbms/tests/integration/test_multiple_disks/test.py +++ b/dbms/tests/integration/test_multiple_disks/test.py @@ -1067,7 +1067,7 @@ def test_freeze(start_cluster): def test_kill_while_insert(start_cluster): try: - name = "test_kill_while_merge" + name = "test_kill_while_insert" node1.query("DROP TABLE IF EXISTS {name}".format(name=name))