From 967a47d870a11e8c8a07e36b82e9245c3517aa3a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 28 Feb 2023 12:17:43 +0100 Subject: [PATCH 1/2] Fix bug with readonly storages --- .../MergeTree/DataPartStorageOnDiskBase.cpp | 5 ++ .../MergeTree/DataPartStorageOnDiskBase.h | 1 + src/Storages/MergeTree/IDataPartStorage.h | 1 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 14 +++-- .../test_disk_over_web_server/test.py | 56 ++++++++++++++----- 5 files changed, 59 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 260bf524ed2..c95a9b7f6f4 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -213,6 +213,11 @@ bool DataPartStorageOnDiskBase::isBroken() const return volume->getDisk()->isBroken(); } +bool DataPartStorageOnDiskBase::isReadonly() const +{ + return volume->getDisk()->isReadOnly(); +} + void DataPartStorageOnDiskBase::syncRevision(UInt64 revision) const { volume->getDisk()->syncRevision(revision); diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 7c408dcf381..52544bb2457 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -39,6 +39,7 @@ public: bool supportZeroCopyReplication() const override; bool supportParallelWrite() const override; bool isBroken() const override; + bool isReadonly() const override; void syncRevision(UInt64 revision) const override; UInt64 getRevision() const override; std::string getDiskPath() const override; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index f92784cb0da..065d4d42dcb 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -151,6 +151,7 @@ public: virtual bool supportZeroCopyReplication() const { return false; } virtual bool supportParallelWrite() const = 0; virtual bool isBroken() const = 0; + virtual bool isReadonly() const = 0; /// TODO: remove or at least remove const. virtual void syncRevision(UInt64 revision) const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 671c6ca67c1..a03a2619993 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1304,6 +1304,8 @@ void IMergeTreeDataPart::loadColumns(bool require) metadata_snapshot = metadata_snapshot->projections.get(name).metadata; NamesAndTypesList loaded_columns; + bool is_readonly_storage = getDataPartStorage().isReadonly(); + if (!metadata_manager->exists("columns.txt")) { /// We can get list of columns only from columns.txt in compact parts. @@ -1319,7 +1321,8 @@ void IMergeTreeDataPart::loadColumns(bool require) if (columns.empty()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); - writeColumns(loaded_columns, {}); + if (!is_readonly_storage) + writeColumns(loaded_columns, {}); } else { @@ -1353,10 +1356,13 @@ void IMergeTreeDataPart::loadColumns(bool require) { loaded_metadata_version = metadata_snapshot->getMetadataVersion(); - writeMetadata(METADATA_VERSION_FILE_NAME, {}, [loaded_metadata_version](auto & buffer) + if (!is_readonly_storage) { - writeIntText(loaded_metadata_version, buffer); - }); + writeMetadata(METADATA_VERSION_FILE_NAME, {}, [loaded_metadata_version](auto & buffer) + { + writeIntText(loaded_metadata_version, buffer); + }); + } } setColumns(loaded_columns, infos, loaded_metadata_version); diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 363df4595b2..2991ece05ec 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -21,23 +21,27 @@ def cluster(): cluster.add_instance( "node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True ) + + cluster.add_instance( + "node4", main_configs=["configs/storage_conf.xml"], + with_nginx=True, stay_alive=True, with_installed_binary=True, + image="clickhouse/clickhouse-server", tag="22.8.14.53", + ) + cluster.start() - node1 = cluster.instances["node1"] - expected = "" - global uuids - for i in range(3): - node1.query( + def create_table_and_upload_data(node, i): + node.query( f"CREATE TABLE data{i} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def', min_bytes_for_wide_part=1;" ) for _ in range(10): - node1.query( + node.query( f"INSERT INTO data{i} SELECT number FROM numbers(500000 * {i+1})" ) - expected = node1.query(f"SELECT * FROM data{i} ORDER BY id") + node.query(f"SELECT * FROM data{i} ORDER BY id") - metadata_path = node1.query( + metadata_path = node.query( f"SELECT data_paths FROM system.tables WHERE name='data{i}'" ) metadata_path = metadata_path[ @@ -45,7 +49,7 @@ def cluster(): ] print(f"Metadata: {metadata_path}") - node1.exec_in_container( + node.exec_in_container( [ "bash", "-c", @@ -56,8 +60,21 @@ def cluster(): user="root", ) parts = metadata_path.split("/") - uuids.append(parts[3]) print(f"UUID: {parts[3]}") + return parts[3] + + + node1 = cluster.instances["node1"] + + global uuids + for i in range(2): + uuid = create_table_and_upload_data(node1, i) + uuids.append(uuid) + + node4 = cluster.instances["node4"] + + uuid = create_table_and_upload_data(node4, 2) + uuids.append(uuid) yield cluster @@ -68,6 +85,7 @@ def cluster(): @pytest.mark.parametrize("node_name", ["node2"]) def test_usage(cluster, node_name): node1 = cluster.instances["node1"] + node4 = cluster.instances["node4"] node2 = cluster.instances[node_name] global uuids assert len(uuids) == 3 @@ -90,7 +108,11 @@ def test_usage(cluster, node_name): result = node2.query( "SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i) ) - assert result == node1.query( + node = node1 + if i == 2: + node = node4 + + assert result == node.query( "SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i) ) @@ -99,7 +121,7 @@ def test_usage(cluster, node_name): i ) ) - assert result == node1.query( + assert result == node.query( "SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format( i ) @@ -141,6 +163,7 @@ def test_incorrect_usage(cluster): @pytest.mark.parametrize("node_name", ["node2"]) def test_cache(cluster, node_name): node1 = cluster.instances["node1"] + node4 = cluster.instances["node4"] node2 = cluster.instances[node_name] global uuids assert len(uuids) == 3 @@ -178,7 +201,12 @@ def test_cache(cluster, node_name): result = node2.query( "SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i) ) - assert result == node1.query( + + node = node1 + if i == 2: + node = node4 + + assert result == node.query( "SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i) ) @@ -187,7 +215,7 @@ def test_cache(cluster, node_name): i ) ) - assert result == node1.query( + assert result == node.query( "SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format( i ) From 979c62af4fe4e7932c8fd05945ca5ee1987b9005 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 28 Feb 2023 11:24:39 +0000 Subject: [PATCH 2/2] Automatic style fix --- tests/integration/test_disk_over_web_server/test.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 2991ece05ec..fd71389f71a 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -23,9 +23,13 @@ def cluster(): ) cluster.add_instance( - "node4", main_configs=["configs/storage_conf.xml"], - with_nginx=True, stay_alive=True, with_installed_binary=True, - image="clickhouse/clickhouse-server", tag="22.8.14.53", + "node4", + main_configs=["configs/storage_conf.xml"], + with_nginx=True, + stay_alive=True, + with_installed_binary=True, + image="clickhouse/clickhouse-server", + tag="22.8.14.53", ) cluster.start() @@ -63,7 +67,6 @@ def cluster(): print(f"UUID: {parts[3]}") return parts[3] - node1 = cluster.instances["node1"] global uuids