A better alternative to #46344

The (experimental) inverted index writes/reads files different from the
standard files written by the other skip indexes. The original problem
was that with database engine "ordinary", DROP TABLE of a table with
inverted index finds unknown files in persistence and complains. The
same will happen with engine "atomic" but deferred. As a hotfix, the
error was silenced by explicitly adding the four files created in a
specific test to the deletion code.

This PR tries a cleaner solution where all needed files are provided via
the normal checksum structure. One drawback remains which is that the
affected files were written earlier and we don't have their checksums
available. Therefore, the inverted index is currently excluded from
CHECK TABLE.

Minimal repro:
  SET allow_experimental_inverted_index = 1;
  DROP TABLE IF EXISTS tab;
  CREATE TABLE tab(s String, INDEX af(s) TYPE inverted(2)) ENGINE = MergeTree() ORDER BY s;
  INSERT INTO tab VALUES ('Alick a01');
  CHECK TABLE tab;
  DROP TABLE IF EXISTS tab;

  run ./clickhouse-test with --db-engine Ordinary
This commit is contained in:
Robert Schulze 2023-02-26 20:01:35 +00:00
parent b5438456aa
commit cc0c0c6133
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
6 changed files with 30 additions and 12 deletions

View File

@ -684,12 +684,6 @@ void DataPartStorageOnDiskBase::clearDirectory(
request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true);
request.emplace_back(fs::path(dir) / "txn_version.txt", true);
/// Inverted index
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true);
disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
disk->removeDirectory(dir);
}

View File

@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
{
const String & name = it.first;
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid"))
continue;
auto jt = rhs.files.find(name);
if (jt == rhs.files.end())
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name);

View File

@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
for (const auto & index_helper : skip_indices)
for (const auto & skip_index : skip_indices)
{
String stream_name = index_helper->getFileName();
String stream_name = skip_index->getFileName();
skip_indices_streams.emplace_back(
std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
stream_name,
data_part->getDataPartStoragePtr(),
stream_name, index_helper->getSerializedFileExtension(),
stream_name, skip_index->getSerializedFileExtension(),
stream_name, marks_file_extension,
default_codec, settings.max_compress_block_size,
marks_compression_codec, settings.marks_compress_block_size,
settings.query_write_settings));
GinIndexStorePtr store = nullptr;
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
if (dynamic_cast<const MergeTreeIndexInverted *>(&*skip_index) != nullptr)
{
store = std::make_shared<GinIndexStore>(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment);
gin_index_stores[stream_name] = store;
}
skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store));
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store));
skip_index_accumulated_marks.push_back(0);
}
}
@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data
auto & stream = *skip_indices_streams[i];
if (!skip_indices_aggregators[i]->empty())
skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing);
/// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown
/// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and
/// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files.
if (dynamic_cast<const MergeTreeIndexInverted *>(&*skip_indices[i]) != nullptr)
{
String filename_without_extension = skip_indices[i]->getFileName();
checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum();
}
}
for (auto & stream : skip_indices_streams)

View File

@ -163,12 +163,16 @@ IMergeTreeDataPart::Checksums checkDataPart(
auto file_name = it->name();
/// We will check projections later.
if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj"))
if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj"))
{
projections_on_disk.insert(file_name);
continue;
}
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid"))
continue;
auto checksum_it = checksums_data.files.find(file_name);
/// Skip files that we already calculated. Also skip metadata files that are not checksummed.

View File

@ -1,4 +1,5 @@
af inverted
1
101 Alick a01
1
101 Alick a01

View File

@ -15,6 +15,9 @@ INSERT INTO tab VALUES (101, 'Alick a01'), (102, 'Blick a02'), (103, 'Click a03'
-- check inverted index was created
SELECT name, type FROM system.data_skipping_indices WHERE table =='tab' AND database = currentDatabase() LIMIT 1;
-- throw in a random consistency check
CHECK TABLE tab;
-- search inverted index with ==
SELECT * FROM tab WHERE s == 'Alick a01';