Merge pull request #19101 from ClickHouse/check_compression_codec_read

Fix compression codec read for empty files
This commit is contained in:
alexey-milovidov 2021-01-15 20:55:58 +03:00 committed by GitHub
commit b97beea22a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 63 additions and 4 deletions

View File

@ -549,14 +549,26 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
auto column_size = getColumnSize(part_column.name, *part_column.type);
if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name))
{
String path_to_data_file = getFullRelativePath() + getFileNameForColumn(part_column) + ".bin";
if (!volume->getDisk()->exists(path_to_data_file))
String path_to_data_file;
part_column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
{
LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but data file {} doesn't exists", name, backQuoteIfNeed(part_column.name), path_to_data_file);
if (path_to_data_file.empty())
{
String candidate_path = getFullRelativePath() + IDataType::getFileNameForStream(part_column.name, substream_path) + ".bin";
/// We can have existing, but empty .bin files. Example: LowCardinality(Nullable(...)) columns and column_name.dict.null.bin file.
if (volume->getDisk()->exists(candidate_path) && volume->getDisk()->getFileSize(candidate_path) != 0)
path_to_data_file = candidate_path;
}
});
if (path_to_data_file.empty())
{
LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but all data files don't exist or empty", name, backQuoteIfNeed(part_column.name));
continue;
}
result = getCompressionCodecForFile(volume->getDisk(), getFullRelativePath() + getFileNameForColumn(part_column) + ".bin");
result = getCompressionCodecForFile(volume->getDisk(), path_to_data_file);
break;
}
}

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,46 @@
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', image='yandex/clickhouse-server', tag='20.8.11.17', with_installed_binary=True, stay_alive=True)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_default_codec_read(start_cluster):
node1.query("""
CREATE TABLE test_18340
(
`lns` LowCardinality(Nullable(String)),
`ns` Nullable(String),
`s` String,
`ni64` Nullable(Int64),
`ui64` UInt64,
`alns` Array(LowCardinality(Nullable(String))),
`ans` Array(Nullable(String)),
`dt` DateTime,
`i32` Int32
)
ENGINE = MergeTree()
PARTITION BY i32
ORDER BY (s, farmHash64(s))
SAMPLE BY farmHash64(s)
""")
node1.query("insert into test_18340 values ('test', 'test', 'test', 0, 0, ['a'], ['a'], now(), 0)")
assert node1.query("SELECT COUNT() FROM test_18340") == "1\n"
node1.restart_with_latest_version()
assert node1.query("SELECT COUNT() FROM test_18340") == "1\n"