diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 0b2b9b9ff62..be87a81772f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -549,14 +549,26 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const auto column_size = getColumnSize(part_column.name, *part_column.type); if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name)) { - String path_to_data_file = getFullRelativePath() + getFileNameForColumn(part_column) + ".bin"; - if (!volume->getDisk()->exists(path_to_data_file)) + String path_to_data_file; + part_column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */) { - LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but data file {} doesn't exists", name, backQuoteIfNeed(part_column.name), path_to_data_file); + if (path_to_data_file.empty()) + { + String candidate_path = getFullRelativePath() + IDataType::getFileNameForStream(part_column.name, substream_path) + ".bin"; + + /// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file. + if (volume->getDisk()->exists(candidate_path) && volume->getDisk()->getFileSize(candidate_path) != 0) + path_to_data_file = candidate_path; + } + }); + + if (path_to_data_file.empty()) + { + LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but all data files don't exist or empty", name, backQuoteIfNeed(part_column.name)); continue; } - result = getCompressionCodecForFile(volume->getDisk(), getFullRelativePath() + getFileNameForColumn(part_column) + ".bin"); + result = getCompressionCodecForFile(volume->getDisk(), path_to_data_file); break; } } diff --git a/tests/integration/test_compression_codec_read/__init__.py b/tests/integration/test_compression_codec_read/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_compression_codec_read/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_compression_codec_read/test.py b/tests/integration/test_compression_codec_read/test.py new file mode 100644 index 00000000000..0eb1f5aa867 --- /dev/null +++ b/tests/integration/test_compression_codec_read/test.py @@ -0,0 +1,46 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', image='yandex/clickhouse-server', tag='20.8.11.17', with_installed_binary=True, stay_alive=True) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + +def test_default_codec_read(start_cluster): + node1.query(""" + CREATE TABLE test_18340 + ( + `lns` LowCardinality(Nullable(String)), + `ns` Nullable(String), + `s` String, + `ni64` Nullable(Int64), + `ui64` UInt64, + `alns` Array(LowCardinality(Nullable(String))), + `ans` Array(Nullable(String)), + `dt` DateTime, + `i32` Int32 + ) + ENGINE = MergeTree() + PARTITION BY i32 + ORDER BY (s, farmHash64(s)) + SAMPLE BY farmHash64(s) + """) + + node1.query("insert into test_18340 values ('test', 'test', 'test', 0, 0, ['a'], ['a'], now(), 0)") + + + assert node1.query("SELECT COUNT() FROM test_18340") == "1\n" + + node1.restart_with_latest_version() + + assert node1.query("SELECT COUNT() FROM test_18340") == "1\n"