ClickHouse/src/Storages/MergeTree/MergeTreeIndexInverted.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

182 lines
5.9 KiB
C++
Raw Normal View History

2022-06-24 01:56:15 +00:00
#pragma once
#include <Interpreters/GinFilter.h>
2023-01-20 11:47:42 +00:00
#include <Interpreters/ITokenExtractor.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <base/types.h>
#include <atomic>
#include <memory>
2022-06-24 01:56:15 +00:00
namespace DB
{
2023-01-20 12:01:53 +00:00
struct MergeTreeIndexGranuleInverted final : public IMergeTreeIndexGranule
2022-06-24 01:56:15 +00:00
{
2023-01-20 12:01:53 +00:00
explicit MergeTreeIndexGranuleInverted(
2022-06-24 01:56:15 +00:00
const String & index_name_,
size_t columns_number,
const GinFilterParameters & params_);
2023-01-20 12:01:53 +00:00
~MergeTreeIndexGranuleInverted() override = default;
2022-06-24 01:56:15 +00:00
void serializeBinary(WriteBuffer & ostr) const override;
void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override;
bool empty() const override { return !has_elems; }
String index_name;
GinFilterParameters params;
2023-01-20 11:52:04 +00:00
GinFilters gin_filters;
2022-06-24 01:56:15 +00:00
bool has_elems;
};
2023-01-20 12:01:53 +00:00
using MergeTreeIndexGranuleInvertedPtr = std::shared_ptr<MergeTreeIndexGranuleInverted>;
2022-06-24 01:56:15 +00:00
2023-01-20 12:01:53 +00:00
struct MergeTreeIndexAggregatorInverted final : IMergeTreeIndexAggregator
2022-06-24 01:56:15 +00:00
{
2023-01-20 12:01:53 +00:00
explicit MergeTreeIndexAggregatorInverted(
2022-06-24 01:56:15 +00:00
GinIndexStorePtr store_,
const Names & index_columns_,
const String & index_name_,
const GinFilterParameters & params_,
TokenExtractorPtr token_extractor_);
2023-01-20 12:01:53 +00:00
~MergeTreeIndexAggregatorInverted() override = default;
2022-06-24 01:56:15 +00:00
bool empty() const override { return !granule || granule->empty(); }
MergeTreeIndexGranulePtr getGranuleAndReset() override;
void update(const Block & block, size_t * pos, size_t limit) override;
2023-01-20 11:47:42 +00:00
void addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit);
2022-06-24 01:56:15 +00:00
GinIndexStorePtr store;
Names index_columns;
const String index_name;
const GinFilterParameters params;
2022-06-24 01:56:15 +00:00
TokenExtractorPtr token_extractor;
2023-01-20 12:01:53 +00:00
MergeTreeIndexGranuleInvertedPtr granule;
2022-06-24 01:56:15 +00:00
};
2023-01-20 12:01:53 +00:00
class MergeTreeConditionInverted final : public IMergeTreeIndexCondition, WithContext
2022-06-24 01:56:15 +00:00
{
public:
2023-01-20 12:01:53 +00:00
MergeTreeConditionInverted(
2022-06-24 01:56:15 +00:00
const SelectQueryInfo & query_info,
ContextPtr context,
const Block & index_sample_block,
const GinFilterParameters & params_,
TokenExtractorPtr token_extactor_);
2023-01-20 12:01:53 +00:00
~MergeTreeConditionInverted() override = default;
2022-06-24 01:56:15 +00:00
bool alwaysUnknownOrTrue() const override;
bool mayBeTrueOnGranule([[maybe_unused]]MergeTreeIndexGranulePtr idx_granule) const override
{
/// should call mayBeTrueOnGranuleInPart instead
assert(false);
return false;
}
2023-01-20 11:47:42 +00:00
bool mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule, [[maybe_unused]] PostingsCacheForStore & cache_store) const;
2022-06-24 01:56:15 +00:00
private:
struct KeyTuplePositionMapping
{
KeyTuplePositionMapping(size_t tuple_index_, size_t key_index_) : tuple_index(tuple_index_), key_index(key_index_) {}
size_t tuple_index;
size_t key_index;
};
/// Uses RPN like KeyCondition
struct RPNElement
{
enum Function
{
/// Atoms of a Boolean expression.
FUNCTION_EQUALS,
FUNCTION_NOT_EQUALS,
FUNCTION_HAS,
FUNCTION_IN,
FUNCTION_NOT_IN,
FUNCTION_MULTI_SEARCH,
FUNCTION_UNKNOWN, /// Can take any value.
/// Operators of the logical expression.
FUNCTION_NOT,
FUNCTION_AND,
FUNCTION_OR,
/// Constants
ALWAYS_FALSE,
ALWAYS_TRUE,
};
RPNElement( /// NOLINT
Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<GinFilter> && const_gin_filter_ = nullptr)
: function(function_), key_column(key_column_), gin_filter(std::move(const_gin_filter_)) {}
Function function = FUNCTION_UNKNOWN;
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS and FUNCTION_MULTI_SEARCH
size_t key_column;
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS
std::unique_ptr<GinFilter> gin_filter;
/// For FUNCTION_IN, FUNCTION_NOT_IN and FUNCTION_MULTI_SEARCH
2023-01-20 11:52:04 +00:00
std::vector<GinFilters> set_gin_filters;
2022-06-24 01:56:15 +00:00
/// For FUNCTION_IN and FUNCTION_NOT_IN
std::vector<size_t> set_key_position;
};
using RPN = std::vector<RPNElement>;
bool traverseAtomAST(const RPNBuilderTreeNode & node, RPNElement & out);
2022-06-24 01:56:15 +00:00
bool traverseASTEquals(
const String & function_name,
const RPNBuilderTreeNode & key_ast,
2022-06-24 01:56:15 +00:00
const DataTypePtr & value_type,
const Field & value_field,
RPNElement & out);
bool tryPrepareSetGinFilter(const RPNBuilderTreeNode & lhs, const RPNBuilderTreeNode & rhs, RPNElement & out);
2022-06-24 01:56:15 +00:00
static bool createFunctionEqualsCondition(
RPNElement & out, const Field & value, const GinFilterParameters & params, TokenExtractorPtr token_extractor);
2022-12-31 17:06:56 +00:00
const Block & header;
2022-06-24 01:56:15 +00:00
GinFilterParameters params;
TokenExtractorPtr token_extractor;
RPN rpn;
/// Sets from syntax analyzer.
PreparedSetsPtr prepared_sets;
2022-06-24 01:56:15 +00:00
};
2023-01-20 12:01:53 +00:00
class MergeTreeIndexInverted final : public IMergeTreeIndex
2022-06-24 01:56:15 +00:00
{
public:
2023-01-20 12:01:53 +00:00
MergeTreeIndexInverted(
2022-06-24 01:56:15 +00:00
const IndexDescription & index_,
const GinFilterParameters & params_,
std::unique_ptr<ITokenExtractor> && token_extractor_)
: IMergeTreeIndex(index_)
, params(params_)
, token_extractor(std::move(token_extractor_)) {}
2023-01-20 12:01:53 +00:00
~MergeTreeIndexInverted() override = default;
2022-06-24 01:56:15 +00:00
MergeTreeIndexGranulePtr createIndexGranule() const override;
MergeTreeIndexAggregatorPtr createIndexAggregator() const override;
2023-01-20 11:47:42 +00:00
MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store) const override;
MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override;
2022-06-24 01:56:15 +00:00
bool mayBenefitFromIndexForIn(const ASTPtr & node) const override;
GinFilterParameters params;
/// Function for selecting next token.
std::unique_ptr<ITokenExtractor> token_extractor;
};
}