From 0ae422d33c0538f759c64444cc8b7a6f8f2a5b63 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 23 Apr 2024 14:11:12 +0000 Subject: [PATCH] Rename MergeTreeIndexFullText --> MergeTreeIndexBloomFilterText --- src/Interpreters/GinFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 4 +- ....cpp => MergeTreeIndexBloomFilterText.cpp} | 56 +++++++++---------- ...Text.h => MergeTreeIndexBloomFilterText.h} | 28 +++++----- .../MergeTree/MergeTreeIndexInverted.h | 1 - src/Storages/MergeTree/MergeTreeIndices.cpp | 12 ++-- src/Storages/MergeTree/MergeTreeIndices.h | 6 +- .../tests/gtest_SplitTokenExtractor.cpp | 2 +- 8 files changed, 55 insertions(+), 56 deletions(-) rename src/Storages/MergeTree/{MergeTreeIndexFullText.cpp => MergeTreeIndexBloomFilterText.cpp} (92%) rename src/Storages/MergeTree/{MergeTreeIndexFullText.h => MergeTreeIndexBloomFilterText.h} (84%) diff --git a/src/Interpreters/GinFilter.cpp b/src/Interpreters/GinFilter.cpp index 5d823318313..1ce26ed1fd1 100644 --- a/src/Interpreters/GinFilter.cpp +++ b/src/Interpreters/GinFilter.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index ed091022a91..4f25a014382 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -921,7 +921,7 @@ static void assertIndexColumnsType(const Block & header) } } -MergeTreeIndexPtr bloomFilterIndexCreatorNew( +MergeTreeIndexPtr bloomFilterIndexCreator( const IndexDescription & index) { double max_conflict_probability = 0.025; @@ -938,7 +938,7 @@ MergeTreeIndexPtr bloomFilterIndexCreatorNew( index, bits_per_row_and_size_of_hash_functions.first, bits_per_row_and_size_of_hash_functions.second); } -void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach) +void bloomFilterIndexValidator(const IndexDescription & index, bool attach) { assertIndexColumnsType(index.sample_block); diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp similarity index 92% rename from src/Storages/MergeTree/MergeTreeIndexFullText.cpp rename to src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp index 4cd616513ac..826b149cf01 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -32,7 +32,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -MergeTreeIndexGranuleFullText::MergeTreeIndexGranuleFullText( +MergeTreeIndexGranuleBloomFilterText::MergeTreeIndexGranuleBloomFilterText( const String & index_name_, size_t columns_number, const BloomFilterParameters & params_) @@ -44,7 +44,7 @@ MergeTreeIndexGranuleFullText::MergeTreeIndexGranuleFullText( { } -void MergeTreeIndexGranuleFullText::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleBloomFilterText::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty fulltext index {}.", backQuote(index_name)); @@ -53,7 +53,7 @@ void MergeTreeIndexGranuleFullText::serializeBinary(WriteBuffer & ostr) const ostr.write(reinterpret_cast(bloom_filter.getFilter().data()), params.filter_size); } -void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) +void MergeTreeIndexGranuleBloomFilterText::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) { if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); @@ -66,7 +66,7 @@ void MergeTreeIndexGranuleFullText::deserializeBinary(ReadBuffer & istr, MergeTr } -MergeTreeIndexAggregatorFullText::MergeTreeIndexAggregatorFullText( +MergeTreeIndexAggregatorBloomFilterText::MergeTreeIndexAggregatorBloomFilterText( const Names & index_columns_, const String & index_name_, const BloomFilterParameters & params_, @@ -76,20 +76,20 @@ MergeTreeIndexAggregatorFullText::MergeTreeIndexAggregatorFullText( , params(params_) , token_extractor(token_extractor_) , granule( - std::make_shared( + std::make_shared( index_name, index_columns.size(), params)) { } -MergeTreeIndexGranulePtr MergeTreeIndexAggregatorFullText::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilterText::getGranuleAndReset() { - auto new_granule = std::make_shared( + auto new_granule = std::make_shared( index_name, index_columns.size(), params); new_granule.swap(granule); return new_granule; } -void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorBloomFilterText::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " @@ -137,7 +137,7 @@ void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, *pos += rows_read; } -MergeTreeConditionFullText::MergeTreeConditionFullText( +MergeTreeConditionBloomFilterText::MergeTreeConditionBloomFilterText( const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const Block & index_sample_block, @@ -162,7 +162,7 @@ MergeTreeConditionFullText::MergeTreeConditionFullText( } /// Keep in-sync with MergeTreeConditionGinFilter::alwaysUnknownOrTrue -bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const +bool MergeTreeConditionBloomFilterText::alwaysUnknownOrTrue() const { /// Check like in KeyCondition. std::vector rpn_stack; @@ -212,10 +212,10 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const } /// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart -bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +bool MergeTreeConditionBloomFilterText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) throw Exception(ErrorCodes::LOGICAL_ERROR, "BloomFilter index condition got a granule with the wrong type."); @@ -323,13 +323,13 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx return rpn_stack[0].can_be_true; } -std::optional MergeTreeConditionFullText::getKeyIndex(const std::string & key_column_name) +std::optional MergeTreeConditionBloomFilterText::getKeyIndex(const std::string & key_column_name) { const auto it = std::ranges::find(index_columns, key_column_name); return it == index_columns.end() ? std::nullopt : std::make_optional(std::ranges::distance(index_columns.cbegin(), it)); } -bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out) +bool MergeTreeConditionBloomFilterText::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNElement & out) { { Field const_value; @@ -419,7 +419,7 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode & return false; } -bool MergeTreeConditionFullText::traverseTreeEquals( +bool MergeTreeConditionBloomFilterText::traverseTreeEquals( const String & function_name, const RPNBuilderTreeNode & key_node, const DataTypePtr & value_type, @@ -638,7 +638,7 @@ bool MergeTreeConditionFullText::traverseTreeEquals( return false; } -bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( +bool MergeTreeConditionBloomFilterText::tryPrepareSetBloomFilter( const RPNBuilderTreeNode & left_argument, const RPNBuilderTreeNode & right_argument, RPNElement & out) @@ -714,23 +714,23 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( return true; } -MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexBloomFilterText::createIndexGranule() const { - return std::make_shared(index.name, index.column_names.size(), params); + return std::make_shared(index.name, index.column_names.size(), params); } -MergeTreeIndexAggregatorPtr MergeTreeIndexFullText::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const +MergeTreeIndexAggregatorPtr MergeTreeIndexBloomFilterText::createIndexAggregator(const MergeTreeWriterSettings & /*settings*/) const { - return std::make_shared(index.column_names, index.name, params, token_extractor.get()); + return std::make_shared(index.column_names, index.name, params, token_extractor.get()); } -MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexBloomFilterText::createIndexCondition( const ActionsDAGPtr & filter_dag, ContextPtr context) const { - return std::make_shared(filter_dag, context, index.sample_block, params, token_extractor.get()); + return std::make_shared(filter_dag, context, index.sample_block, params, token_extractor.get()); } -MergeTreeIndexPtr bloomFilterIndexCreator( +MergeTreeIndexPtr bloomFilterIndexTextCreator( const IndexDescription & index) { if (index.type == NgramTokenExtractor::getName()) @@ -743,7 +743,7 @@ MergeTreeIndexPtr bloomFilterIndexCreator( auto tokenizer = std::make_unique(n); - return std::make_shared(index, params, std::move(tokenizer)); + return std::make_shared(index, params, std::move(tokenizer)); } else if (index.type == SplitTokenExtractor::getName()) { @@ -754,7 +754,7 @@ MergeTreeIndexPtr bloomFilterIndexCreator( auto tokenizer = std::make_unique(); - return std::make_shared(index, params, std::move(tokenizer)); + return std::make_shared(index, params, std::move(tokenizer)); } else { @@ -762,7 +762,7 @@ MergeTreeIndexPtr bloomFilterIndexCreator( } } -void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/) +void bloomFilterIndexTextValidator(const IndexDescription & index, bool /*attach*/) { for (const auto & index_data_type : index.data_types) { diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h similarity index 84% rename from src/Storages/MergeTree/MergeTreeIndexFullText.h rename to src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h index e66f498ce1d..6fd969030df 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilterText.h @@ -11,14 +11,14 @@ namespace DB { -struct MergeTreeIndexGranuleFullText final : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleBloomFilterText final : public IMergeTreeIndexGranule { - explicit MergeTreeIndexGranuleFullText( + explicit MergeTreeIndexGranuleBloomFilterText( const String & index_name_, size_t columns_number, const BloomFilterParameters & params_); - ~MergeTreeIndexGranuleFullText() override = default; + ~MergeTreeIndexGranuleBloomFilterText() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -32,17 +32,17 @@ struct MergeTreeIndexGranuleFullText final : public IMergeTreeIndexGranule bool has_elems; }; -using MergeTreeIndexGranuleFullTextPtr = std::shared_ptr; +using MergeTreeIndexGranuleBloomFilterTextPtr = std::shared_ptr; -struct MergeTreeIndexAggregatorFullText final : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorBloomFilterText final : IMergeTreeIndexAggregator { - explicit MergeTreeIndexAggregatorFullText( + explicit MergeTreeIndexAggregatorBloomFilterText( const Names & index_columns_, const String & index_name_, const BloomFilterParameters & params_, TokenExtractorPtr token_extractor_); - ~MergeTreeIndexAggregatorFullText() override = default; + ~MergeTreeIndexAggregatorBloomFilterText() override = default; bool empty() const override { return !granule || granule->empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; @@ -54,21 +54,21 @@ struct MergeTreeIndexAggregatorFullText final : IMergeTreeIndexAggregator BloomFilterParameters params; TokenExtractorPtr token_extractor; - MergeTreeIndexGranuleFullTextPtr granule; + MergeTreeIndexGranuleBloomFilterTextPtr granule; }; -class MergeTreeConditionFullText final : public IMergeTreeIndexCondition +class MergeTreeConditionBloomFilterText final : public IMergeTreeIndexCondition { public: - MergeTreeConditionFullText( + MergeTreeConditionBloomFilterText( const ActionsDAGPtr & filter_actions_dag, ContextPtr context, const Block & index_sample_block, const BloomFilterParameters & params_, TokenExtractorPtr token_extactor_); - ~MergeTreeConditionFullText() override = default; + ~MergeTreeConditionBloomFilterText() override = default; bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; @@ -146,10 +146,10 @@ private: RPN rpn; }; -class MergeTreeIndexFullText final : public IMergeTreeIndex +class MergeTreeIndexBloomFilterText final : public IMergeTreeIndex { public: - MergeTreeIndexFullText( + MergeTreeIndexBloomFilterText( const IndexDescription & index_, const BloomFilterParameters & params_, std::unique_ptr && token_extractor_) @@ -157,7 +157,7 @@ public: , params(params_) , token_extractor(std::move(token_extractor_)) {} - ~MergeTreeIndexFullText() override = default; + ~MergeTreeIndexBloomFilterText() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator(const MergeTreeWriterSettings & settings) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexInverted.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h index f3c1f37e364..bab4e122aa6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexInverted.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -5,7 +5,6 @@ #include #include #include -#include #include namespace DB diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index 322cdd35afe..be8b4c795f0 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -115,14 +115,14 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerCreator("set", setIndexCreator); registerValidator("set", setIndexValidator); - registerCreator("ngrambf_v1", bloomFilterIndexCreator); - registerValidator("ngrambf_v1", bloomFilterIndexValidator); + registerCreator("ngrambf_v1", bloomFilterIndexTextCreator); + registerValidator("ngrambf_v1", bloomFilterIndexTextValidator); - registerCreator("tokenbf_v1", bloomFilterIndexCreator); - registerValidator("tokenbf_v1", bloomFilterIndexValidator); + registerCreator("tokenbf_v1", bloomFilterIndexTextCreator); + registerValidator("tokenbf_v1", bloomFilterIndexTextValidator); - registerCreator("bloom_filter", bloomFilterIndexCreatorNew); - registerValidator("bloom_filter", bloomFilterIndexValidatorNew); + registerCreator("bloom_filter", bloomFilterIndexCreator); + registerValidator("bloom_filter", bloomFilterIndexValidator); registerCreator("hypothesis", hypothesisIndexCreator); registerValidator("hypothesis", hypothesisIndexValidator); diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 8fdadb4e5eb..900e6b6658c 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -221,12 +221,12 @@ void minmaxIndexValidator(const IndexDescription & index, bool attach); MergeTreeIndexPtr setIndexCreator(const IndexDescription & index); void setIndexValidator(const IndexDescription & index, bool attach); +MergeTreeIndexPtr bloomFilterIndexTextCreator(const IndexDescription & index); +void bloomFilterIndexTextValidator(const IndexDescription & index, bool attach); + MergeTreeIndexPtr bloomFilterIndexCreator(const IndexDescription & index); void bloomFilterIndexValidator(const IndexDescription & index, bool attach); -MergeTreeIndexPtr bloomFilterIndexCreatorNew(const IndexDescription & index); -void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach); - MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index); void hypothesisIndexValidator(const IndexDescription & index, bool attach); diff --git a/src/Storages/tests/gtest_SplitTokenExtractor.cpp b/src/Storages/tests/gtest_SplitTokenExtractor.cpp index 62389639c11..e01673359bd 100644 --- a/src/Storages/tests/gtest_SplitTokenExtractor.cpp +++ b/src/Storages/tests/gtest_SplitTokenExtractor.cpp @@ -1,4 +1,4 @@ -#include +#include #include