2018-12-26 12:19:24 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <unordered_map>
|
|
|
|
#include <vector>
|
2018-12-26 17:34:44 +00:00
|
|
|
#include <memory>
|
|
|
|
#include <Core/Block.h>
|
2020-05-27 18:38:34 +00:00
|
|
|
#include <Storages/StorageInMemoryMetadata.h>
|
2019-01-02 14:24:26 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartChecksum.h>
|
2018-12-29 11:12:41 +00:00
|
|
|
#include <Storages/SelectQueryInfo.h>
|
|
|
|
#include <Storages/MergeTree/MarkRange.h>
|
2018-12-26 17:34:44 +00:00
|
|
|
#include <Interpreters/ExpressionActions.h>
|
2019-03-05 10:55:26 +00:00
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2018-12-26 12:19:24 +00:00
|
|
|
|
2019-01-05 09:26:02 +00:00
|
|
|
constexpr auto INDEX_FILE_PREFIX = "skp_idx_";
|
|
|
|
|
2018-12-26 12:19:24 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-02-06 07:49:18 +00:00
|
|
|
/// Stores some info about a single block of data.
|
|
|
|
struct IMergeTreeIndexGranule
|
2019-01-03 16:47:42 +00:00
|
|
|
{
|
2019-02-06 07:49:18 +00:00
|
|
|
virtual ~IMergeTreeIndexGranule() = default;
|
2019-01-03 16:47:42 +00:00
|
|
|
|
2019-01-04 14:33:38 +00:00
|
|
|
virtual void serializeBinary(WriteBuffer & ostr) const = 0;
|
2019-01-07 16:49:34 +00:00
|
|
|
virtual void deserializeBinary(ReadBuffer & istr) = 0;
|
2019-01-03 16:47:42 +00:00
|
|
|
|
2019-01-04 14:33:38 +00:00
|
|
|
virtual bool empty() const = 0;
|
2019-03-08 15:36:39 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using MergeTreeIndexGranulePtr = std::shared_ptr<IMergeTreeIndexGranule>;
|
|
|
|
using MergeTreeIndexGranules = std::vector<MergeTreeIndexGranulePtr>;
|
|
|
|
|
|
|
|
|
|
|
|
/// Aggregates info about a single block of data.
|
|
|
|
struct IMergeTreeIndexAggregator
|
|
|
|
{
|
|
|
|
virtual ~IMergeTreeIndexAggregator() = default;
|
|
|
|
|
|
|
|
virtual bool empty() const = 0;
|
2019-03-08 19:52:21 +00:00
|
|
|
virtual MergeTreeIndexGranulePtr getGranuleAndReset() = 0;
|
2019-02-06 07:49:18 +00:00
|
|
|
|
|
|
|
/// Updates the stored info using rows of the specified block.
|
|
|
|
/// Reads no more than `limit` rows.
|
|
|
|
/// After finishing updating `pos` will store the position of the first row which was not read.
|
2019-02-19 01:47:27 +00:00
|
|
|
virtual void update(const Block & block, size_t * pos, size_t limit) = 0;
|
2019-01-03 16:47:42 +00:00
|
|
|
};
|
|
|
|
|
2019-03-08 15:36:39 +00:00
|
|
|
using MergeTreeIndexAggregatorPtr = std::shared_ptr<IMergeTreeIndexAggregator>;
|
|
|
|
using MergeTreeIndexAggregators = std::vector<MergeTreeIndexAggregatorPtr>;
|
2019-01-03 16:47:42 +00:00
|
|
|
|
2019-02-06 07:49:18 +00:00
|
|
|
|
2019-01-07 12:51:14 +00:00
|
|
|
/// Condition on the index.
|
2019-06-19 15:30:48 +00:00
|
|
|
class IMergeTreeIndexCondition
|
2019-01-26 06:26:49 +00:00
|
|
|
{
|
2019-01-07 12:51:14 +00:00
|
|
|
public:
|
2019-06-19 15:30:48 +00:00
|
|
|
virtual ~IMergeTreeIndexCondition() = default;
|
2019-01-07 12:51:14 +00:00
|
|
|
/// Checks if this index is useful for query.
|
|
|
|
virtual bool alwaysUnknownOrTrue() const = 0;
|
|
|
|
|
2019-01-08 17:27:44 +00:00
|
|
|
virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
|
2019-01-06 15:22:04 +00:00
|
|
|
};
|
|
|
|
|
2019-06-19 15:30:48 +00:00
|
|
|
using MergeTreeIndexConditionPtr = std::shared_ptr<IMergeTreeIndexCondition>;
|
2019-01-07 12:51:14 +00:00
|
|
|
|
2019-01-06 15:22:04 +00:00
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
struct IMergeTreeIndex
|
2018-12-26 12:19:24 +00:00
|
|
|
{
|
2020-05-28 13:09:03 +00:00
|
|
|
IMergeTreeIndex(const IndexDescription & index_)
|
2020-05-27 18:38:34 +00:00
|
|
|
: index(index_)
|
|
|
|
{
|
|
|
|
}
|
2018-12-26 12:19:24 +00:00
|
|
|
|
2019-02-06 07:49:18 +00:00
|
|
|
virtual ~IMergeTreeIndex() = default;
|
2018-12-26 12:19:24 +00:00
|
|
|
|
2019-01-05 09:26:02 +00:00
|
|
|
/// gets filename without extension
|
2020-05-27 18:38:34 +00:00
|
|
|
String getFileName() const { return INDEX_FILE_PREFIX + index.name; }
|
2019-01-05 09:26:02 +00:00
|
|
|
|
2019-02-25 08:43:19 +00:00
|
|
|
/// Checks whether the column is in data skipping index.
|
|
|
|
virtual bool mayBenefitFromIndexForIn(const ASTPtr & node) const = 0;
|
|
|
|
|
2019-01-04 14:33:38 +00:00
|
|
|
virtual MergeTreeIndexGranulePtr createIndexGranule() const = 0;
|
2020-05-27 18:38:34 +00:00
|
|
|
|
2019-03-08 19:52:21 +00:00
|
|
|
virtual MergeTreeIndexAggregatorPtr createIndexAggregator() const = 0;
|
2019-01-04 14:33:38 +00:00
|
|
|
|
2019-06-19 15:30:48 +00:00
|
|
|
virtual MergeTreeIndexConditionPtr createIndexCondition(
|
2021-04-10 23:33:54 +00:00
|
|
|
const SelectQueryInfo & query_info, ContextPtr context) const = 0;
|
2019-01-04 15:54:34 +00:00
|
|
|
|
2020-05-27 18:38:34 +00:00
|
|
|
Names getColumnsRequiredForIndexCalc() const { return index.expression->getRequiredColumns(); }
|
2019-08-28 18:23:20 +00:00
|
|
|
|
2020-05-28 13:09:03 +00:00
|
|
|
const IndexDescription & index;
|
2018-12-26 12:19:24 +00:00
|
|
|
};
|
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
using MergeTreeIndexPtr = std::shared_ptr<const IMergeTreeIndex>;
|
2020-04-10 13:36:51 +00:00
|
|
|
using MergeTreeIndices = std::vector<MergeTreeIndexPtr>;
|
2019-01-05 18:33:30 +00:00
|
|
|
|
|
|
|
|
2019-08-22 03:24:05 +00:00
|
|
|
class MergeTreeIndexFactory : private boost::noncopyable
|
2018-12-26 12:19:24 +00:00
|
|
|
{
|
|
|
|
public:
|
2019-08-22 03:24:05 +00:00
|
|
|
static MergeTreeIndexFactory & instance();
|
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
using Creator = std::function<MergeTreeIndexPtr(const IndexDescription & index)>;
|
2019-01-07 18:53:51 +00:00
|
|
|
|
2020-05-28 13:09:03 +00:00
|
|
|
using Validator = std::function<void(const IndexDescription & index, bool attach)>;
|
2018-12-26 12:19:24 +00:00
|
|
|
|
2020-05-28 13:09:03 +00:00
|
|
|
void validate(const IndexDescription & index, bool attach) const;
|
2018-12-26 12:19:24 +00:00
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
MergeTreeIndexPtr get(const IndexDescription & index) const;
|
2020-05-28 12:37:05 +00:00
|
|
|
|
2020-05-28 13:09:03 +00:00
|
|
|
MergeTreeIndices getMany(const std::vector<IndexDescription> & indices) const;
|
2020-05-28 12:37:05 +00:00
|
|
|
|
|
|
|
void registerCreator(const std::string & index_type, Creator creator);
|
2020-05-29 14:10:09 +00:00
|
|
|
void registerValidator(const std::string & index_type, Validator validator);
|
2018-12-26 12:19:24 +00:00
|
|
|
|
2018-12-26 17:34:44 +00:00
|
|
|
protected:
|
2019-02-06 09:05:05 +00:00
|
|
|
MergeTreeIndexFactory();
|
2018-12-26 17:34:44 +00:00
|
|
|
|
2018-12-26 12:19:24 +00:00
|
|
|
private:
|
2020-05-28 12:37:05 +00:00
|
|
|
using Creators = std::unordered_map<std::string, Creator>;
|
|
|
|
using Validators = std::unordered_map<std::string, Validator>;
|
|
|
|
Creators creators;
|
|
|
|
Validators validators;
|
2018-12-26 12:19:24 +00:00
|
|
|
};
|
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
MergeTreeIndexPtr minmaxIndexCreator(const IndexDescription & index);
|
2020-05-28 13:09:03 +00:00
|
|
|
void minmaxIndexValidator(const IndexDescription & index, bool attach);
|
2020-05-28 12:37:05 +00:00
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
MergeTreeIndexPtr setIndexCreator(const IndexDescription & index);
|
2020-05-28 13:09:03 +00:00
|
|
|
void setIndexValidator(const IndexDescription & index, bool attach);
|
2020-05-28 12:37:05 +00:00
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
MergeTreeIndexPtr bloomFilterIndexCreator(const IndexDescription & index);
|
2020-05-28 13:09:03 +00:00
|
|
|
void bloomFilterIndexValidator(const IndexDescription & index, bool attach);
|
2019-12-15 06:34:43 +00:00
|
|
|
|
2020-05-28 13:45:08 +00:00
|
|
|
MergeTreeIndexPtr bloomFilterIndexCreatorNew(const IndexDescription & index);
|
2020-05-28 13:09:03 +00:00
|
|
|
void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach);
|
2020-05-28 13:45:08 +00:00
|
|
|
|
2019-01-26 06:26:49 +00:00
|
|
|
}
|