mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #4286 from nikvas0/nikvas0/index_fix
Data Skipping Indices fix
This commit is contained in:
commit
9650c4a0da
@ -460,14 +460,21 @@ ColumnsDescription InterpreterCreateQuery::setColumns(
|
|||||||
ASTCreateQuery & create, const Block & as_select_sample, const StoragePtr & as_storage) const
|
ASTCreateQuery & create, const Block & as_select_sample, const StoragePtr & as_storage) const
|
||||||
{
|
{
|
||||||
ColumnsDescription res;
|
ColumnsDescription res;
|
||||||
|
IndicesDescription indices;
|
||||||
|
|
||||||
if (create.columns_list && create.columns_list->columns)
|
if (create.columns_list)
|
||||||
{
|
{
|
||||||
|
if (create.columns_list->columns)
|
||||||
res = getColumnsDescription(*create.columns_list->columns, context);
|
res = getColumnsDescription(*create.columns_list->columns, context);
|
||||||
|
if (create.columns_list->indices)
|
||||||
|
for (const auto & index : create.columns_list->indices->children)
|
||||||
|
indices.indices.push_back(
|
||||||
|
std::dynamic_pointer_cast<ASTIndexDeclaration>(index->clone()));
|
||||||
}
|
}
|
||||||
else if (!create.as_table.empty())
|
else if (!create.as_table.empty())
|
||||||
{
|
{
|
||||||
res = as_storage->getColumns();
|
res = as_storage->getColumns();
|
||||||
|
indices = as_storage->getIndicesDescription();
|
||||||
}
|
}
|
||||||
else if (create.select)
|
else if (create.select)
|
||||||
{
|
{
|
||||||
@ -479,6 +486,8 @@ ColumnsDescription InterpreterCreateQuery::setColumns(
|
|||||||
|
|
||||||
/// Even if query has list of columns, canonicalize it (unfold Nested columns).
|
/// Even if query has list of columns, canonicalize it (unfold Nested columns).
|
||||||
ASTPtr new_columns = formatColumns(res);
|
ASTPtr new_columns = formatColumns(res);
|
||||||
|
ASTPtr new_indices = formatIndices(indices);
|
||||||
|
|
||||||
if (!create.columns_list)
|
if (!create.columns_list)
|
||||||
{
|
{
|
||||||
auto new_columns_list = std::make_shared<ASTColumns>();
|
auto new_columns_list = std::make_shared<ASTColumns>();
|
||||||
@ -490,6 +499,11 @@ ColumnsDescription InterpreterCreateQuery::setColumns(
|
|||||||
else
|
else
|
||||||
create.columns_list->set(create.columns_list->columns, new_columns);
|
create.columns_list->set(create.columns_list->columns, new_columns);
|
||||||
|
|
||||||
|
if (new_indices && create.columns_list->indices)
|
||||||
|
create.columns_list->replace(create.columns_list->indices, new_indices);
|
||||||
|
else if (new_indices)
|
||||||
|
create.columns_list->set(create.columns_list->indices, new_indices);
|
||||||
|
|
||||||
/// Check for duplicates
|
/// Check for duplicates
|
||||||
std::set<String> all_columns;
|
std::set<String> all_columns;
|
||||||
auto check_column_already_exists = [&all_columns](const NameAndTypePair & column_name_and_type)
|
auto check_column_already_exists = [&all_columns](const NameAndTypePair & column_name_and_type)
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
#include <Core/Types.h>
|
#include <Core/Types.h>
|
||||||
#include <Common/FieldVisitors.h>
|
#include <Common/FieldVisitors.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Parsers/ASTExpressionList.h>
|
#include <Parsers/ASTExpressionList.h>
|
||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/IAST.h>
|
#include <Parsers/IAST.h>
|
||||||
@ -21,7 +22,7 @@ public:
|
|||||||
String name;
|
String name;
|
||||||
IAST * expr;
|
IAST * expr;
|
||||||
ASTFunction * type;
|
ASTFunction * type;
|
||||||
Field granularity;
|
UInt64 granularity;
|
||||||
|
|
||||||
/** Get the text that identifies this element. */
|
/** Get the text that identifies this element. */
|
||||||
String getID(char) const override { return "Index"; }
|
String getID(char) const override { return "Index"; }
|
||||||
@ -52,7 +53,7 @@ public:
|
|||||||
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
|
s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
|
||||||
type->formatImpl(s, state, frame);
|
type->formatImpl(s, state, frame);
|
||||||
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
|
s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
|
||||||
s.ostr << applyVisitor(FieldVisitorToString(), granularity);
|
s.ostr << toString(granularity);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe
|
|||||||
|
|
||||||
auto index = std::make_shared<ASTIndexDeclaration>();
|
auto index = std::make_shared<ASTIndexDeclaration>();
|
||||||
index->name = typeid_cast<const ASTIdentifier &>(*name).name;
|
index->name = typeid_cast<const ASTIdentifier &>(*name).name;
|
||||||
index->granularity = typeid_cast<const ASTLiteral &>(*granularity).value;
|
index->granularity = typeid_cast<const ASTLiteral &>(*granularity).value.get<UInt64>();
|
||||||
index->set(index->expr, expr);
|
index->set(index->expr, expr);
|
||||||
index->set(index->type, type);
|
index->set(index->type, type);
|
||||||
node = index;
|
node = index;
|
||||||
|
@ -6,11 +6,11 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
using IndicesAsts = std::vector<std::shared_ptr<ASTIndexDeclaration>>;
|
using IndicesASTs = std::vector<std::shared_ptr<ASTIndexDeclaration>>;
|
||||||
|
|
||||||
struct IndicesDescription
|
struct IndicesDescription
|
||||||
{
|
{
|
||||||
IndicesAsts indices;
|
IndicesASTs indices;
|
||||||
|
|
||||||
IndicesDescription() = default;
|
IndicesDescription() = default;
|
||||||
|
|
||||||
|
@ -1149,7 +1149,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns,
|
void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns,
|
||||||
const IndicesAsts & old_indices, const IndicesAsts & new_indices, ExpressionActionsPtr & out_expression,
|
const IndicesASTs & old_indices, const IndicesASTs & new_indices, ExpressionActionsPtr & out_expression,
|
||||||
NameToNameMap & out_rename_map, bool & out_force_update_metadata) const
|
NameToNameMap & out_rename_map, bool & out_force_update_metadata) const
|
||||||
{
|
{
|
||||||
out_expression = nullptr;
|
out_expression = nullptr;
|
||||||
@ -1309,7 +1309,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
|
|||||||
MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
|
MergeTreeData::AlterDataPartTransactionPtr MergeTreeData::alterDataPart(
|
||||||
const DataPartPtr & part,
|
const DataPartPtr & part,
|
||||||
const NamesAndTypesList & new_columns,
|
const NamesAndTypesList & new_columns,
|
||||||
const IndicesAsts & new_indices,
|
const IndicesASTs & new_indices,
|
||||||
bool skip_sanity_checks)
|
bool skip_sanity_checks)
|
||||||
{
|
{
|
||||||
ExpressionActionsPtr expression;
|
ExpressionActionsPtr expression;
|
||||||
|
@ -490,7 +490,7 @@ public:
|
|||||||
AlterDataPartTransactionPtr alterDataPart(
|
AlterDataPartTransactionPtr alterDataPart(
|
||||||
const DataPartPtr & part,
|
const DataPartPtr & part,
|
||||||
const NamesAndTypesList & new_columns,
|
const NamesAndTypesList & new_columns,
|
||||||
const IndicesAsts & new_indices,
|
const IndicesASTs & new_indices,
|
||||||
bool skip_sanity_checks);
|
bool skip_sanity_checks);
|
||||||
|
|
||||||
/// Freezes all parts.
|
/// Freezes all parts.
|
||||||
@ -746,7 +746,7 @@ private:
|
|||||||
/// Files to be deleted are mapped to an empty string in out_rename_map.
|
/// Files to be deleted are mapped to an empty string in out_rename_map.
|
||||||
/// If part == nullptr, just checks that all type conversions are possible.
|
/// If part == nullptr, just checks that all type conversions are possible.
|
||||||
void createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns,
|
void createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns,
|
||||||
const IndicesAsts & old_indices, const IndicesAsts & new_indices,
|
const IndicesASTs & old_indices, const IndicesASTs & new_indices,
|
||||||
ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const;
|
ExpressionActionsPtr & out_expression, NameToNameMap & out_rename_map, bool & out_force_update_metadata) const;
|
||||||
|
|
||||||
/// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked.
|
/// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked.
|
||||||
|
@ -520,6 +520,14 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
|
|
||||||
RangesInDataParts parts_with_ranges;
|
RangesInDataParts parts_with_ranges;
|
||||||
|
|
||||||
|
std::vector<std::pair<MergeTreeIndexPtr, IndexConditionPtr>> useful_indices;
|
||||||
|
for (const auto & index : data.skip_indices)
|
||||||
|
{
|
||||||
|
auto condition = index->createIndexCondition(query_info, context);
|
||||||
|
if (!condition->alwaysUnknownOrTrue())
|
||||||
|
useful_indices.emplace_back(index, condition);
|
||||||
|
}
|
||||||
|
|
||||||
/// Let's find what range to read from each part.
|
/// Let's find what range to read from each part.
|
||||||
size_t sum_marks = 0;
|
size_t sum_marks = 0;
|
||||||
size_t sum_ranges = 0;
|
size_t sum_ranges = 0;
|
||||||
@ -532,16 +540,9 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
else
|
else
|
||||||
ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}};
|
ranges.ranges = MarkRanges{MarkRange{0, part->marks_count}};
|
||||||
|
|
||||||
/// It can be done in multiple threads (one thread for each part).
|
for (const auto & index_and_condition : useful_indices)
|
||||||
/// Maybe it should be moved to BlockInputStream, but it can cause some problems.
|
ranges.ranges = filterMarksUsingIndex(
|
||||||
for (const auto & index : data.skip_indices)
|
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings);
|
||||||
{
|
|
||||||
auto condition = index->createIndexCondition(query_info, context);
|
|
||||||
if (!condition->alwaysUnknownOrTrue())
|
|
||||||
{
|
|
||||||
ranges.ranges = filterMarksUsingIndex(index, condition, part, ranges.ranges, settings);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!ranges.ranges.empty())
|
if (!ranges.ranges.empty())
|
||||||
{
|
{
|
||||||
|
@ -26,7 +26,7 @@ void MergeTreeIndexFactory::registerIndex(const std::string &name, Creator creat
|
|||||||
ErrorCodes::LOGICAL_ERROR);
|
ErrorCodes::LOGICAL_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<MergeTreeIndex> MergeTreeIndexFactory::get(
|
std::unique_ptr<IMergeTreeIndex> MergeTreeIndexFactory::get(
|
||||||
const NamesAndTypesList & columns,
|
const NamesAndTypesList & columns,
|
||||||
std::shared_ptr<ASTIndexDeclaration> node,
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
const Context & context) const
|
const Context & context) const
|
||||||
@ -54,4 +54,21 @@ std::unique_ptr<MergeTreeIndex> MergeTreeIndexFactory::get(
|
|||||||
return it->second(columns, node, context);
|
return it->second(columns, node, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::unique_ptr<IMergeTreeIndex> minmaxIndexCreator(
|
||||||
|
const NamesAndTypesList & columns,
|
||||||
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
|
const Context & context);
|
||||||
|
|
||||||
|
std::unique_ptr<IMergeTreeIndex> setIndexCreator(
|
||||||
|
const NamesAndTypesList & columns,
|
||||||
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
|
const Context & context);
|
||||||
|
|
||||||
|
MergeTreeIndexFactory::MergeTreeIndexFactory()
|
||||||
|
{
|
||||||
|
registerIndex("minmax", minmaxIndexCreator);
|
||||||
|
registerIndex("set", setIndexCreator);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -18,48 +18,51 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
class MergeTreeData;
|
class MergeTreeData;
|
||||||
class MergeTreeIndex;
|
class IMergeTreeIndex;
|
||||||
|
|
||||||
using MergeTreeIndexPtr = std::shared_ptr<const MergeTreeIndex>;
|
using MergeTreeIndexPtr = std::shared_ptr<const IMergeTreeIndex>;
|
||||||
using MutableMergeTreeIndexPtr = std::shared_ptr<MergeTreeIndex>;
|
using MutableMergeTreeIndexPtr = std::shared_ptr<IMergeTreeIndex>;
|
||||||
|
|
||||||
|
|
||||||
struct MergeTreeIndexGranule
|
/// Stores some info about a single block of data.
|
||||||
|
struct IMergeTreeIndexGranule
|
||||||
{
|
{
|
||||||
virtual ~MergeTreeIndexGranule() = default;
|
virtual ~IMergeTreeIndexGranule() = default;
|
||||||
|
|
||||||
virtual void serializeBinary(WriteBuffer & ostr) const = 0;
|
virtual void serializeBinary(WriteBuffer & ostr) const = 0;
|
||||||
virtual void deserializeBinary(ReadBuffer & istr) = 0;
|
virtual void deserializeBinary(ReadBuffer & istr) = 0;
|
||||||
|
|
||||||
virtual String toString() const = 0;
|
|
||||||
virtual bool empty() const = 0;
|
virtual bool empty() const = 0;
|
||||||
|
|
||||||
|
/// Updates the stored info using rows of the specified block.
|
||||||
|
/// Reads no more than `limit` rows.
|
||||||
|
/// After finishing updating `pos` will store the position of the first row which was not read.
|
||||||
virtual void update(const Block & block, size_t * pos, size_t limit) = 0;
|
virtual void update(const Block & block, size_t * pos, size_t limit) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
using MergeTreeIndexGranulePtr = std::shared_ptr<IMergeTreeIndexGranule>;
|
||||||
using MergeTreeIndexGranulePtr = std::shared_ptr<MergeTreeIndexGranule>;
|
|
||||||
using MergeTreeIndexGranules = std::vector<MergeTreeIndexGranulePtr>;
|
using MergeTreeIndexGranules = std::vector<MergeTreeIndexGranulePtr>;
|
||||||
|
|
||||||
|
|
||||||
/// Condition on the index.
|
/// Condition on the index.
|
||||||
class IndexCondition
|
class IIndexCondition
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual ~IndexCondition() = default;
|
virtual ~IIndexCondition() = default;
|
||||||
/// Checks if this index is useful for query.
|
/// Checks if this index is useful for query.
|
||||||
virtual bool alwaysUnknownOrTrue() const = 0;
|
virtual bool alwaysUnknownOrTrue() const = 0;
|
||||||
|
|
||||||
virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
|
virtual bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr granule) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using IndexConditionPtr = std::shared_ptr<IndexCondition>;
|
using IndexConditionPtr = std::shared_ptr<IIndexCondition>;
|
||||||
|
|
||||||
|
|
||||||
/// Structure for storing basic index info like columns, expression, arguments, ...
|
/// Structure for storing basic index info like columns, expression, arguments, ...
|
||||||
class MergeTreeIndex
|
class IMergeTreeIndex
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MergeTreeIndex(
|
IMergeTreeIndex(
|
||||||
String name,
|
String name,
|
||||||
ExpressionActionsPtr expr,
|
ExpressionActionsPtr expr,
|
||||||
const Names & columns,
|
const Names & columns,
|
||||||
@ -73,7 +76,7 @@ public:
|
|||||||
, header(header)
|
, header(header)
|
||||||
, granularity(granularity) {}
|
, granularity(granularity) {}
|
||||||
|
|
||||||
virtual ~MergeTreeIndex() = default;
|
virtual ~IMergeTreeIndex() = default;
|
||||||
|
|
||||||
/// gets filename without extension
|
/// gets filename without extension
|
||||||
String getFileName() const { return INDEX_FILE_PREFIX + name; }
|
String getFileName() const { return INDEX_FILE_PREFIX + name; }
|
||||||
@ -91,7 +94,6 @@ public:
|
|||||||
size_t granularity;
|
size_t granularity;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
using MergeTreeIndices = std::vector<MutableMergeTreeIndexPtr>;
|
using MergeTreeIndices = std::vector<MutableMergeTreeIndexPtr>;
|
||||||
|
|
||||||
|
|
||||||
@ -101,12 +103,12 @@ class MergeTreeIndexFactory : public ext::singleton<MergeTreeIndexFactory>
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
using Creator = std::function<
|
using Creator = std::function<
|
||||||
std::unique_ptr<MergeTreeIndex>(
|
std::unique_ptr<IMergeTreeIndex>(
|
||||||
const NamesAndTypesList & columns,
|
const NamesAndTypesList & columns,
|
||||||
std::shared_ptr<ASTIndexDeclaration> node,
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
const Context & context)>;
|
const Context & context)>;
|
||||||
|
|
||||||
std::unique_ptr<MergeTreeIndex> get(
|
std::unique_ptr<IMergeTreeIndex> get(
|
||||||
const NamesAndTypesList & columns,
|
const NamesAndTypesList & columns,
|
||||||
std::shared_ptr<ASTIndexDeclaration> node,
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
const Context & context) const;
|
const Context & context) const;
|
||||||
@ -116,7 +118,7 @@ public:
|
|||||||
const auto & getAllIndexes() const { return indexes; }
|
const auto & getAllIndexes() const { return indexes; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
MergeTreeIndexFactory() = default;
|
MergeTreeIndexFactory();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using Indexes = std::unordered_map<std::string, Creator>;
|
using Indexes = std::unordered_map<std::string, Creator>;
|
||||||
|
@ -17,7 +17,7 @@ namespace ErrorCodes
|
|||||||
|
|
||||||
|
|
||||||
MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index)
|
MergeTreeMinMaxGranule::MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index)
|
||||||
: MergeTreeIndexGranule(), index(index), parallelogram()
|
: IMergeTreeIndexGranule(), index(index), parallelogram()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -52,22 +52,13 @@ void MergeTreeMinMaxGranule::deserializeBinary(ReadBuffer & istr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String MergeTreeMinMaxGranule::toString() const
|
|
||||||
{
|
|
||||||
String res = "";
|
|
||||||
|
|
||||||
for (size_t i = 0; i < parallelogram.size(); ++i)
|
|
||||||
{
|
|
||||||
res += "["
|
|
||||||
+ applyVisitor(FieldVisitorToString(), parallelogram[i].left) + ", "
|
|
||||||
+ applyVisitor(FieldVisitorToString(), parallelogram[i].right) + "]";
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit)
|
void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t limit)
|
||||||
{
|
{
|
||||||
|
if (*pos >= block.rows())
|
||||||
|
throw Exception(
|
||||||
|
"The provided position is not less than the number of block rows. Position: "
|
||||||
|
+ toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
size_t rows_read = std::min(limit, block.rows() - *pos);
|
size_t rows_read = std::min(limit, block.rows() - *pos);
|
||||||
|
|
||||||
for (size_t i = 0; i < index.columns.size(); ++i)
|
for (size_t i = 0; i < index.columns.size(); ++i)
|
||||||
@ -96,7 +87,7 @@ MinMaxCondition::MinMaxCondition(
|
|||||||
const SelectQueryInfo &query,
|
const SelectQueryInfo &query,
|
||||||
const Context &context,
|
const Context &context,
|
||||||
const MergeTreeMinMaxIndex &index)
|
const MergeTreeMinMaxIndex &index)
|
||||||
: IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}
|
: IIndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}
|
||||||
|
|
||||||
bool MinMaxCondition::alwaysUnknownOrTrue() const
|
bool MinMaxCondition::alwaysUnknownOrTrue() const
|
||||||
{
|
{
|
||||||
@ -109,7 +100,7 @@ bool MinMaxCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c
|
|||||||
= std::dynamic_pointer_cast<MergeTreeMinMaxGranule>(idx_granule);
|
= std::dynamic_pointer_cast<MergeTreeMinMaxGranule>(idx_granule);
|
||||||
if (!granule)
|
if (!granule)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
"Minmax index condition got wrong granule", ErrorCodes::LOGICAL_ERROR);
|
"Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
|
return condition.mayBeTrueInParallelogram(granule->parallelogram, index.data_types);
|
||||||
}
|
}
|
||||||
@ -127,7 +118,7 @@ return std::make_shared<MinMaxCondition>(query, context, *this);
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
std::unique_ptr<MergeTreeIndex> MergeTreeMinMaxIndexCreator(
|
std::unique_ptr<IMergeTreeIndex> minmaxIndexCreator(
|
||||||
const NamesAndTypesList & new_columns,
|
const NamesAndTypesList & new_columns,
|
||||||
std::shared_ptr<ASTIndexDeclaration> node,
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
const Context & context)
|
const Context & context)
|
||||||
@ -158,7 +149,7 @@ std::unique_ptr<MergeTreeIndex> MergeTreeMinMaxIndexCreator(
|
|||||||
}
|
}
|
||||||
|
|
||||||
return std::make_unique<MergeTreeMinMaxIndex>(
|
return std::make_unique<MergeTreeMinMaxIndex>(
|
||||||
node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity.get<size_t>());
|
node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -13,16 +13,14 @@ namespace DB
|
|||||||
class MergeTreeMinMaxIndex;
|
class MergeTreeMinMaxIndex;
|
||||||
|
|
||||||
|
|
||||||
struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule
|
struct MergeTreeMinMaxGranule : public IMergeTreeIndexGranule
|
||||||
{
|
{
|
||||||
explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index);
|
explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index);
|
||||||
|
|
||||||
void serializeBinary(WriteBuffer & ostr) const override;
|
void serializeBinary(WriteBuffer & ostr) const override;
|
||||||
void deserializeBinary(ReadBuffer & istr) override;
|
void deserializeBinary(ReadBuffer & istr) override;
|
||||||
|
|
||||||
String toString() const override;
|
|
||||||
bool empty() const override { return parallelogram.empty(); }
|
bool empty() const override { return parallelogram.empty(); }
|
||||||
|
|
||||||
void update(const Block & block, size_t * pos, size_t limit) override;
|
void update(const Block & block, size_t * pos, size_t limit) override;
|
||||||
|
|
||||||
~MergeTreeMinMaxGranule() override = default;
|
~MergeTreeMinMaxGranule() override = default;
|
||||||
@ -32,7 +30,7 @@ struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class MinMaxCondition : public IndexCondition
|
class MinMaxCondition : public IIndexCondition
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MinMaxCondition(
|
MinMaxCondition(
|
||||||
@ -51,7 +49,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class MergeTreeMinMaxIndex : public MergeTreeIndex
|
class MergeTreeMinMaxIndex : public IMergeTreeIndex
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MergeTreeMinMaxIndex(
|
MergeTreeMinMaxIndex(
|
||||||
@ -61,7 +59,7 @@ public:
|
|||||||
const DataTypes & data_types_,
|
const DataTypes & data_types_,
|
||||||
const Block & header_,
|
const Block & header_,
|
||||||
size_t granularity_)
|
size_t granularity_)
|
||||||
: MergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_) {}
|
: IMergeTreeIndex(name_, expr_, columns_, data_types_, header_, granularity_) {}
|
||||||
|
|
||||||
~MergeTreeMinMaxIndex() override = default;
|
~MergeTreeMinMaxIndex() override = default;
|
||||||
|
|
||||||
@ -72,7 +70,4 @@ public:
|
|||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<MergeTreeIndex> MergeTreeMinMaxIndexCreator(
|
|
||||||
const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/// Class for reading a single column (or index).
|
||||||
class MergeTreeReaderStream
|
class MergeTreeReaderStream
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeUniqueIndex.h>
|
#include <Storages/MergeTree/MergeTreeSetSkippingIndex.h>
|
||||||
|
|
||||||
#include <Interpreters/ExpressionActions.h>
|
#include <Interpreters/ExpressionActions.h>
|
||||||
#include <Interpreters/ExpressionAnalyzer.h>
|
#include <Interpreters/ExpressionAnalyzer.h>
|
||||||
@ -8,8 +8,6 @@
|
|||||||
#include <Parsers/ASTFunction.h>
|
#include <Parsers/ASTFunction.h>
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
|
|
||||||
#include <Poco/Logger.h>
|
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -19,13 +17,17 @@ namespace ErrorCodes
|
|||||||
extern const int INCORRECT_QUERY;
|
extern const int INCORRECT_QUERY;
|
||||||
}
|
}
|
||||||
|
|
||||||
MergeTreeUniqueGranule::MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index)
|
/// 0b11 -- can be true and false at the same time
|
||||||
: MergeTreeIndexGranule(), index(index), set(new Set(SizeLimits{}, true))
|
const Field UNKNOWN_FIELD(3);
|
||||||
|
|
||||||
|
|
||||||
|
MergeTreeSetIndexGranule::MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index)
|
||||||
|
: IMergeTreeIndexGranule(), index(index), set(new Set(SizeLimits{}, true))
|
||||||
{
|
{
|
||||||
set->setHeader(index.header);
|
set->setHeader(index.header);
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const
|
void MergeTreeSetIndexGranule::serializeBinary(WriteBuffer & ostr) const
|
||||||
{
|
{
|
||||||
if (empty())
|
if (empty())
|
||||||
throw Exception(
|
throw Exception(
|
||||||
@ -49,7 +51,7 @@ void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr)
|
void MergeTreeSetIndexGranule::deserializeBinary(ReadBuffer & istr)
|
||||||
{
|
{
|
||||||
if (!set->empty())
|
if (!set->empty())
|
||||||
{
|
{
|
||||||
@ -76,31 +78,13 @@ void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr)
|
|||||||
set->insertFromBlock(block);
|
set->insertFromBlock(block);
|
||||||
}
|
}
|
||||||
|
|
||||||
String MergeTreeUniqueGranule::toString() const
|
void MergeTreeSetIndexGranule::update(const Block & new_block, size_t * pos, size_t limit)
|
||||||
{
|
{
|
||||||
String res = "";
|
if (*pos >= new_block.rows())
|
||||||
|
throw Exception(
|
||||||
|
"The provided position is not less than the number of block rows. Position: "
|
||||||
|
+ toString(*pos) + ", Block rows: " + toString(new_block.rows()) + ".", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
const auto & columns = set->getSetElements();
|
|
||||||
for (size_t i = 0; i < index.columns.size(); ++i)
|
|
||||||
{
|
|
||||||
const auto & column = columns[i];
|
|
||||||
res += " [";
|
|
||||||
for (size_t j = 0; j < column->size(); ++j)
|
|
||||||
{
|
|
||||||
if (j != 0)
|
|
||||||
res += ", ";
|
|
||||||
Field field;
|
|
||||||
column->get(j, field);
|
|
||||||
res += applyVisitor(FieldVisitorToString(), field);
|
|
||||||
}
|
|
||||||
res += "]\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_t limit)
|
|
||||||
{
|
|
||||||
size_t rows_read = std::min(limit, new_block.rows() - *pos);
|
size_t rows_read = std::min(limit, new_block.rows() - *pos);
|
||||||
|
|
||||||
if (index.max_rows && size() > index.max_rows)
|
if (index.max_rows && size() > index.max_rows)
|
||||||
@ -126,7 +110,7 @@ void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_
|
|||||||
*pos += rows_read;
|
*pos += rows_read;
|
||||||
}
|
}
|
||||||
|
|
||||||
Block MergeTreeUniqueGranule::getElementsBlock() const
|
Block MergeTreeSetIndexGranule::getElementsBlock() const
|
||||||
{
|
{
|
||||||
if (index.max_rows && size() > index.max_rows)
|
if (index.max_rows && size() > index.max_rows)
|
||||||
return index.header;
|
return index.header;
|
||||||
@ -134,11 +118,11 @@ Block MergeTreeUniqueGranule::getElementsBlock() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
UniqueCondition::UniqueCondition(
|
SetIndexCondition::SetIndexCondition(
|
||||||
const SelectQueryInfo & query,
|
const SelectQueryInfo & query,
|
||||||
const Context & context,
|
const Context & context,
|
||||||
const MergeTreeUniqueIndex &index)
|
const MergeTreeSetSkippingIndex &index)
|
||||||
: IndexCondition(), index(index)
|
: IIndexCondition(), index(index)
|
||||||
{
|
{
|
||||||
for (size_t i = 0, size = index.columns.size(); i < size; ++i)
|
for (size_t i = 0, size = index.columns.size(); i < size; ++i)
|
||||||
{
|
{
|
||||||
@ -151,30 +135,23 @@ UniqueCondition::UniqueCondition(
|
|||||||
|
|
||||||
/// Replace logical functions with bit functions.
|
/// Replace logical functions with bit functions.
|
||||||
/// Working with UInt8: last bit = can be true, previous = can be false.
|
/// Working with UInt8: last bit = can be true, previous = can be false.
|
||||||
ASTPtr new_expression;
|
|
||||||
if (select.where_expression && select.prewhere_expression)
|
if (select.where_expression && select.prewhere_expression)
|
||||||
new_expression = makeASTFunction(
|
expression_ast = makeASTFunction(
|
||||||
"and",
|
"and",
|
||||||
select.where_expression->clone(),
|
select.where_expression->clone(),
|
||||||
select.prewhere_expression->clone());
|
select.prewhere_expression->clone());
|
||||||
else if (select.where_expression)
|
else if (select.where_expression)
|
||||||
new_expression = select.where_expression->clone();
|
expression_ast = select.where_expression->clone();
|
||||||
else if (select.prewhere_expression)
|
else if (select.prewhere_expression)
|
||||||
new_expression = select.prewhere_expression->clone();
|
expression_ast = select.prewhere_expression->clone();
|
||||||
else
|
else
|
||||||
/// 0b11 -- can be true and false at the same time
|
expression_ast = std::make_shared<ASTLiteral>(UNKNOWN_FIELD);
|
||||||
new_expression = std::make_shared<ASTLiteral>(Field(3));
|
|
||||||
|
|
||||||
useless = checkASTAlwaysUnknownOrTrue(new_expression);
|
useless = checkASTUseless(expression_ast);
|
||||||
/// Do not proceed if index is useless for this query.
|
/// Do not proceed if index is useless for this query.
|
||||||
if (useless)
|
if (useless)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
expression_ast = makeASTFunction(
|
|
||||||
"bitAnd",
|
|
||||||
new_expression,
|
|
||||||
std::make_shared<ASTLiteral>(Field(1)));
|
|
||||||
|
|
||||||
traverseAST(expression_ast);
|
traverseAST(expression_ast);
|
||||||
|
|
||||||
auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze(
|
auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze(
|
||||||
@ -182,17 +159,17 @@ UniqueCondition::UniqueCondition(
|
|||||||
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
|
actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UniqueCondition::alwaysUnknownOrTrue() const
|
bool SetIndexCondition::alwaysUnknownOrTrue() const
|
||||||
{
|
{
|
||||||
return useless;
|
return useless;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
|
bool SetIndexCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const
|
||||||
{
|
{
|
||||||
auto granule = std::dynamic_pointer_cast<MergeTreeUniqueGranule>(idx_granule);
|
auto granule = std::dynamic_pointer_cast<MergeTreeSetIndexGranule>(idx_granule);
|
||||||
if (!granule)
|
if (!granule)
|
||||||
throw Exception(
|
throw Exception(
|
||||||
"Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR);
|
"Unique index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
if (useless)
|
if (useless)
|
||||||
return true;
|
return true;
|
||||||
@ -203,17 +180,16 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c
|
|||||||
Block result = granule->getElementsBlock();
|
Block result = granule->getElementsBlock();
|
||||||
actions->execute(result);
|
actions->execute(result);
|
||||||
|
|
||||||
|
|
||||||
const auto & column = result.getByName(expression_ast->getColumnName()).column;
|
const auto & column = result.getByName(expression_ast->getColumnName()).column;
|
||||||
|
|
||||||
for (size_t i = 0; i < column->size(); ++i)
|
for (size_t i = 0; i < column->size(); ++i)
|
||||||
if (column->getBool(i))
|
if (column->getInt(i) & 1)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UniqueCondition::traverseAST(ASTPtr & node) const
|
void SetIndexCondition::traverseAST(ASTPtr & node) const
|
||||||
{
|
{
|
||||||
if (operatorFromAST(node))
|
if (operatorFromAST(node))
|
||||||
{
|
{
|
||||||
@ -226,10 +202,10 @@ void UniqueCondition::traverseAST(ASTPtr & node) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!atomFromAST(node))
|
if (!atomFromAST(node))
|
||||||
node = std::make_shared<ASTLiteral>(Field(3)); /// can_be_true=1 can_be_false=1
|
node = std::make_shared<ASTLiteral>(UNKNOWN_FIELD);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UniqueCondition::atomFromAST(ASTPtr & node) const
|
bool SetIndexCondition::atomFromAST(ASTPtr & node) const
|
||||||
{
|
{
|
||||||
/// Function, literal or column
|
/// Function, literal or column
|
||||||
|
|
||||||
@ -260,14 +236,14 @@ bool UniqueCondition::atomFromAST(ASTPtr & node) const
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UniqueCondition::operatorFromAST(ASTPtr & node) const
|
bool SetIndexCondition::operatorFromAST(ASTPtr & node) const
|
||||||
{
|
{
|
||||||
/// Functions AND, OR, NOT. Replace with bit*.
|
/// Functions AND, OR, NOT. Replace with bit*.
|
||||||
auto * func = typeid_cast<ASTFunction *>(&*node);
|
auto * func = typeid_cast<ASTFunction *>(&*node);
|
||||||
if (!func)
|
if (!func)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
|
ASTs & args = typeid_cast<ASTExpressionList &>(*func->arguments).children;
|
||||||
|
|
||||||
if (func->name == "not")
|
if (func->name == "not")
|
||||||
{
|
{
|
||||||
@ -277,16 +253,50 @@ bool UniqueCondition::operatorFromAST(ASTPtr & node) const
|
|||||||
func->name = "__bitSwapLastTwo";
|
func->name = "__bitSwapLastTwo";
|
||||||
}
|
}
|
||||||
else if (func->name == "and" || func->name == "indexHint")
|
else if (func->name == "and" || func->name == "indexHint")
|
||||||
func->name = "bitAnd";
|
{
|
||||||
|
auto last_arg = args.back();
|
||||||
|
args.pop_back();
|
||||||
|
|
||||||
|
ASTPtr new_func;
|
||||||
|
if (args.size() > 1)
|
||||||
|
new_func = makeASTFunction(
|
||||||
|
"bitAnd",
|
||||||
|
node,
|
||||||
|
last_arg);
|
||||||
|
else
|
||||||
|
new_func = makeASTFunction(
|
||||||
|
"bitAnd",
|
||||||
|
args.back(),
|
||||||
|
last_arg);
|
||||||
|
|
||||||
|
node = new_func;
|
||||||
|
}
|
||||||
else if (func->name == "or")
|
else if (func->name == "or")
|
||||||
func->name = "bitOr";
|
{
|
||||||
|
auto last_arg = args.back();
|
||||||
|
args.pop_back();
|
||||||
|
|
||||||
|
ASTPtr new_func;
|
||||||
|
if (args.size() > 1)
|
||||||
|
new_func = makeASTFunction(
|
||||||
|
"bitOr",
|
||||||
|
node,
|
||||||
|
last_arg);
|
||||||
|
else
|
||||||
|
new_func = makeASTFunction(
|
||||||
|
"bitOr",
|
||||||
|
args.back(),
|
||||||
|
last_arg);
|
||||||
|
|
||||||
|
node = new_func;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkAtomName(const String & name)
|
static bool checkAtomName(const String & name)
|
||||||
{
|
{
|
||||||
static std::set<String> atoms = {
|
static std::set<String> atoms = {
|
||||||
"notEquals",
|
"notEquals",
|
||||||
@ -302,7 +312,7 @@ bool checkAtomName(const String & name)
|
|||||||
return atoms.find(name) != atoms.end();
|
return atoms.find(name) != atoms.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool UniqueCondition::checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic) const
|
bool SetIndexCondition::checkASTUseless(const ASTPtr &node, bool atomic) const
|
||||||
{
|
{
|
||||||
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
|
if (const auto * func = typeid_cast<const ASTFunction *>(node.get()))
|
||||||
{
|
{
|
||||||
@ -312,16 +322,16 @@ bool UniqueCondition::checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atom
|
|||||||
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
|
const ASTs & args = typeid_cast<const ASTExpressionList &>(*func->arguments).children;
|
||||||
|
|
||||||
if (func->name == "and" || func->name == "indexHint")
|
if (func->name == "and" || func->name == "indexHint")
|
||||||
return checkASTAlwaysUnknownOrTrue(args[0], atomic) && checkASTAlwaysUnknownOrTrue(args[1], atomic);
|
return checkASTUseless(args[0], atomic) && checkASTUseless(args[1], atomic);
|
||||||
else if (func->name == "or")
|
else if (func->name == "or")
|
||||||
return checkASTAlwaysUnknownOrTrue(args[0], atomic) || checkASTAlwaysUnknownOrTrue(args[1], atomic);
|
return checkASTUseless(args[0], atomic) || checkASTUseless(args[1], atomic);
|
||||||
else if (func->name == "not")
|
else if (func->name == "not")
|
||||||
return checkASTAlwaysUnknownOrTrue(args[0], atomic);
|
return checkASTUseless(args[0], atomic);
|
||||||
else if (!atomic && checkAtomName(func->name))
|
else if (!atomic && checkAtomName(func->name))
|
||||||
return checkASTAlwaysUnknownOrTrue(node, true);
|
return checkASTUseless(node, true);
|
||||||
else
|
else
|
||||||
return std::any_of(args.begin(), args.end(),
|
return std::any_of(args.begin(), args.end(),
|
||||||
[this, &atomic](const auto & arg) { return checkASTAlwaysUnknownOrTrue(arg, atomic); });
|
[this, &atomic](const auto & arg) { return checkASTUseless(arg, atomic); });
|
||||||
}
|
}
|
||||||
else if (const auto * literal = typeid_cast<const ASTLiteral *>(node.get()))
|
else if (const auto * literal = typeid_cast<const ASTLiteral *>(node.get()))
|
||||||
return !atomic && literal->value.get<bool>();
|
return !atomic && literal->value.get<bool>();
|
||||||
@ -332,19 +342,19 @@ bool UniqueCondition::checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atom
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
MergeTreeIndexGranulePtr MergeTreeUniqueIndex::createIndexGranule() const
|
MergeTreeIndexGranulePtr MergeTreeSetSkippingIndex::createIndexGranule() const
|
||||||
{
|
{
|
||||||
return std::make_shared<MergeTreeUniqueGranule>(*this);
|
return std::make_shared<MergeTreeSetIndexGranule>(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexConditionPtr MergeTreeUniqueIndex::createIndexCondition(
|
IndexConditionPtr MergeTreeSetSkippingIndex::createIndexCondition(
|
||||||
const SelectQueryInfo & query, const Context & context) const
|
const SelectQueryInfo & query, const Context & context) const
|
||||||
{
|
{
|
||||||
return std::make_shared<UniqueCondition>(query, context, *this);
|
return std::make_shared<SetIndexCondition>(query, context, *this);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
std::unique_ptr<MergeTreeIndex> MergeTreeUniqueIndexCreator(
|
std::unique_ptr<IMergeTreeIndex> setIndexCreator(
|
||||||
const NamesAndTypesList & new_columns,
|
const NamesAndTypesList & new_columns,
|
||||||
std::shared_ptr<ASTIndexDeclaration> node,
|
std::shared_ptr<ASTIndexDeclaration> node,
|
||||||
const Context & context)
|
const Context & context)
|
||||||
@ -386,8 +396,8 @@ std::unique_ptr<MergeTreeIndex> MergeTreeUniqueIndexCreator(
|
|||||||
header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
|
header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name));
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::make_unique<MergeTreeUniqueIndex>(
|
return std::make_unique<MergeTreeSetSkippingIndex>(
|
||||||
node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get<size_t>(), max_rows);
|
node->name, std::move(unique_expr), columns, data_types, header, node->granularity, max_rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -12,50 +12,49 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
class MergeTreeUniqueIndex;
|
class MergeTreeSetSkippingIndex;
|
||||||
|
|
||||||
struct MergeTreeUniqueGranule : public MergeTreeIndexGranule
|
struct MergeTreeSetIndexGranule : public IMergeTreeIndexGranule
|
||||||
{
|
{
|
||||||
explicit MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index);
|
explicit MergeTreeSetIndexGranule(const MergeTreeSetSkippingIndex & index);
|
||||||
|
|
||||||
void serializeBinary(WriteBuffer & ostr) const override;
|
void serializeBinary(WriteBuffer & ostr) const override;
|
||||||
void deserializeBinary(ReadBuffer & istr) override;
|
void deserializeBinary(ReadBuffer & istr) override;
|
||||||
|
|
||||||
String toString() const override;
|
|
||||||
size_t size() const { return set->getTotalRowCount(); }
|
size_t size() const { return set->getTotalRowCount(); }
|
||||||
bool empty() const override { return !size(); }
|
bool empty() const override { return !size(); }
|
||||||
|
|
||||||
void update(const Block & block, size_t * pos, size_t limit) override;
|
void update(const Block & block, size_t * pos, size_t limit) override;
|
||||||
Block getElementsBlock() const;
|
Block getElementsBlock() const;
|
||||||
|
|
||||||
~MergeTreeUniqueGranule() override = default;
|
~MergeTreeSetIndexGranule() override = default;
|
||||||
|
|
||||||
const MergeTreeUniqueIndex & index;
|
const MergeTreeSetSkippingIndex & index;
|
||||||
std::unique_ptr<Set> set;
|
std::unique_ptr<Set> set;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class UniqueCondition : public IndexCondition
|
class SetIndexCondition : public IIndexCondition
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
UniqueCondition(
|
SetIndexCondition(
|
||||||
const SelectQueryInfo & query,
|
const SelectQueryInfo & query,
|
||||||
const Context & context,
|
const Context & context,
|
||||||
const MergeTreeUniqueIndex & index);
|
const MergeTreeSetSkippingIndex & index);
|
||||||
|
|
||||||
bool alwaysUnknownOrTrue() const override;
|
bool alwaysUnknownOrTrue() const override;
|
||||||
|
|
||||||
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
|
bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override;
|
||||||
|
|
||||||
~UniqueCondition() override = default;
|
~SetIndexCondition() override = default;
|
||||||
private:
|
private:
|
||||||
void traverseAST(ASTPtr & node) const;
|
void traverseAST(ASTPtr & node) const;
|
||||||
bool atomFromAST(ASTPtr & node) const;
|
bool atomFromAST(ASTPtr & node) const;
|
||||||
bool operatorFromAST(ASTPtr & node) const;
|
bool operatorFromAST(ASTPtr & node) const;
|
||||||
|
|
||||||
bool checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic = false) const;
|
bool checkASTUseless(const ASTPtr &node, bool atomic = false) const;
|
||||||
|
|
||||||
const MergeTreeUniqueIndex & index;
|
const MergeTreeSetSkippingIndex & index;
|
||||||
|
|
||||||
bool useless;
|
bool useless;
|
||||||
std::set<String> key_columns;
|
std::set<String> key_columns;
|
||||||
@ -64,10 +63,10 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
class MergeTreeUniqueIndex : public MergeTreeIndex
|
class MergeTreeSetSkippingIndex : public IMergeTreeIndex
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MergeTreeUniqueIndex(
|
MergeTreeSetSkippingIndex(
|
||||||
String name_,
|
String name_,
|
||||||
ExpressionActionsPtr expr_,
|
ExpressionActionsPtr expr_,
|
||||||
const Names & columns_,
|
const Names & columns_,
|
||||||
@ -75,9 +74,9 @@ public:
|
|||||||
const Block & header_,
|
const Block & header_,
|
||||||
size_t granularity_,
|
size_t granularity_,
|
||||||
size_t max_rows_)
|
size_t max_rows_)
|
||||||
: MergeTreeIndex(std::move(name_), std::move(expr_), columns_, data_types_, header_, granularity_), max_rows(max_rows_) {}
|
: IMergeTreeIndex(std::move(name_), std::move(expr_), columns_, data_types_, header_, granularity_), max_rows(max_rows_) {}
|
||||||
|
|
||||||
~MergeTreeUniqueIndex() override = default;
|
~MergeTreeSetSkippingIndex() override = default;
|
||||||
|
|
||||||
MergeTreeIndexGranulePtr createIndexGranule() const override;
|
MergeTreeIndexGranulePtr createIndexGranule() const override;
|
||||||
|
|
||||||
@ -87,7 +86,4 @@ public:
|
|||||||
size_t max_rows = 0;
|
size_t max_rows = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<MergeTreeIndex> MergeTreeUniqueIndexCreator(
|
|
||||||
const NamesAndTypesList & columns, std::shared_ptr<ASTIndexDeclaration> node, const Context & context);
|
|
||||||
|
|
||||||
}
|
}
|
@ -3,7 +3,7 @@
|
|||||||
#include <Storages/StorageReplicatedMergeTree.h>
|
#include <Storages/StorageReplicatedMergeTree.h>
|
||||||
#include <Storages/MergeTree/MergeTreeIndices.h>
|
#include <Storages/MergeTree/MergeTreeIndices.h>
|
||||||
#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
|
#include <Storages/MergeTree/MergeTreeMinMaxIndex.h>
|
||||||
#include <Storages/MergeTree/MergeTreeUniqueIndex.h>
|
#include <Storages/MergeTree/MergeTreeSetSkippingIndex.h>
|
||||||
|
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <Common/OptimizedRegularExpression.h>
|
#include <Common/OptimizedRegularExpression.h>
|
||||||
@ -579,7 +579,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
|||||||
if (args.query.columns_list && args.query.columns_list->indices)
|
if (args.query.columns_list && args.query.columns_list->indices)
|
||||||
for (const auto & index : args.query.columns_list->indices->children)
|
for (const auto & index : args.query.columns_list->indices->children)
|
||||||
indices_description.indices.push_back(
|
indices_description.indices.push_back(
|
||||||
std::dynamic_pointer_cast<ASTIndexDeclaration>(index->ptr()));
|
std::dynamic_pointer_cast<ASTIndexDeclaration>(index->clone()));
|
||||||
|
|
||||||
storage_settings.loadFromQuery(*args.storage_def);
|
storage_settings.loadFromQuery(*args.storage_def);
|
||||||
}
|
}
|
||||||
@ -624,14 +624,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void registerMergeTreeSkipIndices()
|
|
||||||
{
|
|
||||||
auto & factory = MergeTreeIndexFactory::instance();
|
|
||||||
factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator);
|
|
||||||
factory.registerIndex("unique", MergeTreeUniqueIndexCreator);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void registerStorageMergeTree(StorageFactory & factory)
|
void registerStorageMergeTree(StorageFactory & factory)
|
||||||
{
|
{
|
||||||
factory.registerStorage("MergeTree", create);
|
factory.registerStorage("MergeTree", create);
|
||||||
@ -649,8 +641,6 @@ void registerStorageMergeTree(StorageFactory & factory)
|
|||||||
factory.registerStorage("ReplicatedSummingMergeTree", create);
|
factory.registerStorage("ReplicatedSummingMergeTree", create);
|
||||||
factory.registerStorage("ReplicatedGraphiteMergeTree", create);
|
factory.registerStorage("ReplicatedGraphiteMergeTree", create);
|
||||||
factory.registerStorage("ReplicatedVersionedCollapsingMergeTree", create);
|
factory.registerStorage("ReplicatedVersionedCollapsingMergeTree", create);
|
||||||
|
|
||||||
registerMergeTreeSkipIndices();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -113,7 +113,6 @@ namespace ErrorCodes
|
|||||||
extern const int KEEPER_EXCEPTION;
|
extern const int KEEPER_EXCEPTION;
|
||||||
extern const int ALL_REPLICAS_LOST;
|
extern const int ALL_REPLICAS_LOST;
|
||||||
extern const int REPLICA_STATUS_CHANGED;
|
extern const int REPLICA_STATUS_CHANGED;
|
||||||
extern const int INCORRECT_QUERY;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace ActionLocks
|
namespace ActionLocks
|
||||||
|
@ -1,8 +0,0 @@
|
|||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
@ -1,41 +0,0 @@
|
|||||||
DROP TABLE IF EXISTS test.minmax_idx;
|
|
||||||
|
|
||||||
CREATE TABLE test.minmax_idx
|
|
||||||
(
|
|
||||||
u64 UInt64,
|
|
||||||
i32 Int32,
|
|
||||||
f64 Float64,
|
|
||||||
d Decimal(10, 2),
|
|
||||||
s String,
|
|
||||||
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
|
||||||
dt Date,
|
|
||||||
INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4,
|
|
||||||
INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
|
|
||||||
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
|
||||||
) ENGINE = MergeTree()
|
|
||||||
ORDER BY u64
|
|
||||||
SETTINGS index_granularity = 2;
|
|
||||||
|
|
||||||
|
|
||||||
/* many small inserts => table will make merges */
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
|
|
||||||
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
|
|
||||||
INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
|
|
||||||
|
|
||||||
/* simple select */
|
|
||||||
SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
|
|
||||||
|
|
||||||
/* select with hole made by primary key */
|
|
||||||
SELECT * FROM test.minmax_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt;
|
|
||||||
|
|
||||||
DROP TABLE test.minmax_idx;
|
|
@ -1,16 +0,0 @@
|
|||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
@ -1,8 +0,0 @@
|
|||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
||||||
0 5 4.7 6.50 cba b 2014-01-04
|
|
||||||
0 5 4.7 6.50 cba b 2014-03-11
|
|
||||||
2 5 4.7 6.50 cba b 2014-06-11
|
|
||||||
2 5 4.7 6.50 cba b 2015-01-01
|
|
@ -1,41 +0,0 @@
|
|||||||
DROP TABLE IF EXISTS test.unique_idx;
|
|
||||||
|
|
||||||
CREATE TABLE test.unique_idx
|
|
||||||
(
|
|
||||||
u64 UInt64,
|
|
||||||
i32 Int32,
|
|
||||||
f64 Float64,
|
|
||||||
d Decimal(10, 2),
|
|
||||||
s String,
|
|
||||||
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
|
||||||
dt Date,
|
|
||||||
INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 4,
|
|
||||||
INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 2,
|
|
||||||
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE unique GRANULARITY 3
|
|
||||||
) ENGINE = MergeTree()
|
|
||||||
ORDER BY u64
|
|
||||||
SETTINGS index_granularity = 2;
|
|
||||||
|
|
||||||
|
|
||||||
/* many small inserts => table will make merges */
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
|
|
||||||
INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
|
|
||||||
INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
|
|
||||||
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
|
|
||||||
INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
|
|
||||||
INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
|
|
||||||
INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
|
|
||||||
|
|
||||||
/* simple select */
|
|
||||||
SELECT * FROM test.unique_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
|
|
||||||
|
|
||||||
/* select with hole made by primary key */
|
|
||||||
SELECT * FROM test.unique_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt;
|
|
||||||
|
|
||||||
DROP TABLE test.unique_idx;
|
|
@ -0,0 +1,6 @@
|
|||||||
|
0 5 4.7 6.50 cba b 2014-01-04
|
||||||
|
1 5 4.7 6.50 cba b 2014-03-11
|
||||||
|
11 5 4.7 6.50 cba b 2014-06-11
|
||||||
|
12 5 4.7 6.50 cba b 2015-01-01
|
||||||
|
"rows_read": 4,
|
||||||
|
"rows_read": 2,
|
47
dbms/tests/queries/0_stateless/00837_minmax_index.sh
Executable file
47
dbms/tests/queries/0_stateless/00837_minmax_index.sh
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
. $CURDIR/../shell_config.sh
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="CREATE TABLE test.minmax_idx
|
||||||
|
(
|
||||||
|
u64 UInt64,
|
||||||
|
i32 Int32,
|
||||||
|
f64 Float64,
|
||||||
|
d Decimal(10, 2),
|
||||||
|
s String,
|
||||||
|
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
||||||
|
dt Date,
|
||||||
|
INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 1,
|
||||||
|
INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
|
||||||
|
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
||||||
|
) ENGINE = MergeTree()
|
||||||
|
ORDER BY u64
|
||||||
|
SETTINGS index_granularity = 2"
|
||||||
|
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="INSERT INTO test.minmax_idx VALUES
|
||||||
|
(0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'),
|
||||||
|
(1, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'),
|
||||||
|
(2, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'),
|
||||||
|
(3, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'),
|
||||||
|
(4, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'),
|
||||||
|
(5, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'),
|
||||||
|
(6, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'),
|
||||||
|
(7, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'),
|
||||||
|
(8, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'),
|
||||||
|
(9, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'),
|
||||||
|
(11, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'),
|
||||||
|
(12, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01')"
|
||||||
|
|
||||||
|
# simple select
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# select with hole made by primary key
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.minmax_idx WHERE (u64 < 2 OR u64 > 10) AND e != 'b' ORDER BY dt"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.minmax_idx WHERE (u64 < 2 OR u64 > 10) AND e != 'b' ORDER BY dt FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE test.minmax_idx"
|
@ -0,0 +1,8 @@
|
|||||||
|
0 5 4.7 6.50 cba b 2014-01-04
|
||||||
|
1 5 4.7 6.50 cba b 2014-03-11
|
||||||
|
12 5 4.7 6.50 cba b 2014-06-11
|
||||||
|
13 5 4.7 6.50 cba b 2015-01-01
|
||||||
|
0 5 4.7 6.50 cba b 2014-01-04
|
||||||
|
1 5 4.7 6.50 cba b 2014-03-11
|
||||||
|
12 5 4.7 6.50 cba b 2014-06-11
|
||||||
|
13 5 4.7 6.50 cba b 2015-01-01
|
@ -11,7 +11,7 @@ CREATE TABLE test.minmax_idx1
|
|||||||
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
||||||
dt Date,
|
dt Date,
|
||||||
INDEX
|
INDEX
|
||||||
idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
|
idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 1,
|
||||||
INDEX
|
INDEX
|
||||||
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
||||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1')
|
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r1')
|
||||||
@ -28,7 +28,7 @@ CREATE TABLE test.minmax_idx2
|
|||||||
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
||||||
dt Date,
|
dt Date,
|
||||||
INDEX
|
INDEX
|
||||||
idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2,
|
idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 1,
|
||||||
INDEX
|
INDEX
|
||||||
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3
|
||||||
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2')
|
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/minmax', 'r2')
|
||||||
@ -37,33 +37,36 @@ SETTINGS index_granularity = 2;
|
|||||||
|
|
||||||
|
|
||||||
/* many small inserts => table will make merges */
|
/* many small inserts => table will make merges */
|
||||||
INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
|
INSERT INTO test.minmax_idx1 VALUES (2, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01');
|
||||||
INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
|
INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04');
|
||||||
INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
|
INSERT INTO test.minmax_idx2 VALUES (3, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01');
|
||||||
INSERT INTO test.minmax_idx2 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
|
INSERT INTO test.minmax_idx2 VALUES (4, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01');
|
||||||
INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
|
INSERT INTO test.minmax_idx2 VALUES (13, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01');
|
||||||
INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
|
INSERT INTO test.minmax_idx1 VALUES (5, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11');
|
||||||
|
|
||||||
SYSTEM SYNC REPLICA test.minmax_idx1;
|
SYSTEM SYNC REPLICA test.minmax_idx1;
|
||||||
SYSTEM SYNC REPLICA test.minmax_idx2;
|
SYSTEM SYNC REPLICA test.minmax_idx2;
|
||||||
|
|
||||||
INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
|
INSERT INTO test.minmax_idx1 VALUES (6, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11');
|
||||||
INSERT INTO test.minmax_idx1 VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
|
INSERT INTO test.minmax_idx1 VALUES (1, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11');
|
||||||
INSERT INTO test.minmax_idx1 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
|
INSERT INTO test.minmax_idx1 VALUES (7, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11');
|
||||||
INSERT INTO test.minmax_idx1 VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
|
INSERT INTO test.minmax_idx1 VALUES (8, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11');
|
||||||
INSERT INTO test.minmax_idx2 VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
|
INSERT INTO test.minmax_idx2 VALUES (12, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11');
|
||||||
INSERT INTO test.minmax_idx2 VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
|
INSERT INTO test.minmax_idx2 VALUES (9, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11');
|
||||||
|
|
||||||
SYSTEM SYNC REPLICA test.minmax_idx1;
|
SYSTEM SYNC REPLICA test.minmax_idx1;
|
||||||
SYSTEM SYNC REPLICA test.minmax_idx2;
|
SYSTEM SYNC REPLICA test.minmax_idx2;
|
||||||
|
|
||||||
|
OPTIMIZE TABLE test.minmax_idx1;
|
||||||
|
OPTIMIZE TABLE test.minmax_idx2;
|
||||||
|
|
||||||
/* simple select */
|
/* simple select */
|
||||||
SELECT * FROM test.minmax_idx1 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
|
SELECT * FROM test.minmax_idx1 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
|
||||||
SELECT * FROM test.minmax_idx2 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
|
SELECT * FROM test.minmax_idx2 WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt;
|
||||||
|
|
||||||
/* select with hole made by primary key */
|
/* select with hole made by primary key */
|
||||||
SELECT * FROM test.minmax_idx1 WHERE u64 != 1 AND e = 'b' ORDER BY dt;
|
SELECT * FROM test.minmax_idx1 WHERE (u64 < 2 OR u64 > 10) AND e != 'b' ORDER BY dt;
|
||||||
SELECT * FROM test.minmax_idx2 WHERE u64 != 1 AND e = 'b' ORDER BY dt;
|
SELECT * FROM test.minmax_idx2 WHERE (u64 < 2 OR u64 > 10) AND e != 'b' ORDER BY dt;
|
||||||
|
|
||||||
DROP TABLE test.minmax_idx1;
|
DROP TABLE test.minmax_idx1;
|
||||||
DROP TABLE test.minmax_idx2;
|
DROP TABLE test.minmax_idx2;
|
@ -0,0 +1,6 @@
|
|||||||
|
0 5 4.7 6.50 cba b 2014-01-04
|
||||||
|
1 5 4.7 6.50 cba b 2014-03-11
|
||||||
|
12 5 4.7 6.50 cba b 2014-06-11
|
||||||
|
13 5 4.7 6.50 cba b 2015-01-01
|
||||||
|
"rows_read": 4,
|
||||||
|
"rows_read": 2,
|
46
dbms/tests/queries/0_stateless/00838_unique_index.sh
Executable file
46
dbms/tests/queries/0_stateless/00838_unique_index.sh
Executable file
@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||||
|
. $CURDIR/../shell_config.sh
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.set_idx;"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="CREATE TABLE test.set_idx
|
||||||
|
(
|
||||||
|
u64 UInt64,
|
||||||
|
i32 Int32,
|
||||||
|
f64 Float64,
|
||||||
|
d Decimal(10, 2),
|
||||||
|
s String,
|
||||||
|
e Enum8('a' = 1, 'b' = 2, 'c' = 3),
|
||||||
|
dt Date,
|
||||||
|
INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE set GRANULARITY 1,
|
||||||
|
INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE set GRANULARITY 2,
|
||||||
|
INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE set GRANULARITY 3
|
||||||
|
) ENGINE = MergeTree()
|
||||||
|
ORDER BY u64
|
||||||
|
SETTINGS index_granularity = 2;"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="INSERT INTO test.set_idx VALUES
|
||||||
|
(0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'),
|
||||||
|
(1, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'),
|
||||||
|
(2, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'),
|
||||||
|
(3, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'),
|
||||||
|
(4, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'),
|
||||||
|
(5, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'),
|
||||||
|
(6, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'),
|
||||||
|
(7, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'),
|
||||||
|
(8, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'),
|
||||||
|
(9, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'),
|
||||||
|
(12, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'),
|
||||||
|
(13, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01')"
|
||||||
|
|
||||||
|
# simple select
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.set_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.set_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
# select with hole made by primary key
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.set_idx WHERE (u64 < 2 OR u64 > 10) AND s != 'cba' ORDER BY dt"
|
||||||
|
$CLICKHOUSE_CLIENT --query="SELECT * FROM test.set_idx WHERE (u64 < 2 OR u64 > 10) AND s != 'cba' ORDER BY dt FORMAT JSON" | grep "rows_read"
|
||||||
|
|
||||||
|
$CLICKHOUSE_CLIENT --query="DROP TABLE test.set_idx;"
|
@ -250,7 +250,7 @@ CREATE TABLE table_name
|
|||||||
s String,
|
s String,
|
||||||
...
|
...
|
||||||
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
|
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
|
||||||
INDEX b (u64 * length(s)) TYPE unique GRANULARITY 4
|
INDEX b (u64 * length(s)) TYPE set GRANULARITY 4
|
||||||
) ENGINE = MergeTree()
|
) ENGINE = MergeTree()
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
@ -266,14 +266,14 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
|
|||||||
* `minmax`
|
* `minmax`
|
||||||
Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of the data like the primary key.
|
Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of the data like the primary key.
|
||||||
|
|
||||||
* `unique(max_rows)`
|
* `set(max_rows)`
|
||||||
Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data.
|
Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data.
|
||||||
If `max_rows=0`, then there are no limits for storing values. `unique` without parameters is equal to `unique(0)`.
|
If `max_rows=0`, then there are no limits for storing values. `set` without parameters is equal to `set(0)`.
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
|
INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4
|
||||||
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4
|
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set GRANULARITY 4
|
||||||
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4
|
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -96,9 +96,9 @@ It only works for tables in the [`*MergeTree`](../operations/table_engines/merge
|
|||||||
[replicated](../operations/table_engines/replication.md) tables). The following operations
|
[replicated](../operations/table_engines/replication.md) tables). The following operations
|
||||||
are available:
|
are available:
|
||||||
|
|
||||||
* `ALTER ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata.
|
* `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata.
|
||||||
|
|
||||||
* `ALTER DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
|
* `ALTER TABLE [db].name DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
|
||||||
|
|
||||||
These commands are lightweight in a sense that they only change metadata or remove files.
|
These commands are lightweight in a sense that they only change metadata or remove files.
|
||||||
Also, they are replicated (syncing indices metadata through ZooKeeper).
|
Also, they are replicated (syncing indices metadata through ZooKeeper).
|
||||||
|
@ -241,7 +241,7 @@ CREATE TABLE table_name
|
|||||||
s String,
|
s String,
|
||||||
...
|
...
|
||||||
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
|
INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3,
|
||||||
INDEX b (u64 * length(s), i32) TYPE unique GRANULARITY 4
|
INDEX b (u64 * length(s), i32) TYPE set GRANULARITY 4
|
||||||
) ENGINE = MergeTree()
|
) ENGINE = MergeTree()
|
||||||
...
|
...
|
||||||
```
|
```
|
||||||
@ -257,7 +257,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
|
|||||||
* `minmax`
|
* `minmax`
|
||||||
Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу.
|
Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу.
|
||||||
|
|
||||||
* `unique(max_rows)`
|
* `set(max_rows)`
|
||||||
Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных.
|
Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных.
|
||||||
Если `max_rows=0`, то хранит значения выражения без ограничений. Если параметров не передано, то полагается `max_rows=0`.
|
Если `max_rows=0`, то хранит значения выражения без ограничений. Если параметров не передано, то полагается `max_rows=0`.
|
||||||
|
|
||||||
@ -265,8 +265,8 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
|
|||||||
Примеры
|
Примеры
|
||||||
```sql
|
```sql
|
||||||
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4
|
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4
|
||||||
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4
|
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set GRANULARITY 4
|
||||||
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4
|
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,12 +83,12 @@ MODIFY ORDER BY new_expression
|
|||||||
|
|
||||||
Добавить или удалить индекс можно с помощью операций
|
Добавить или удалить индекс можно с помощью операций
|
||||||
```
|
```
|
||||||
ALTER ADD INDEX name expression TYPE type GRANULARITY value [AFTER name]
|
ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [AFTER name]
|
||||||
ALTER DROP INDEX name
|
ALTER TABLE [db].name DROP INDEX name
|
||||||
```
|
```
|
||||||
Поддерживается только таблицами семейства `*MergeTree`.
|
Поддерживается только таблицами семейства `*MergeTree`.
|
||||||
|
|
||||||
Команда `ALTER ADD INDEX` добавляет описание индексов в метаданные, а `ALTER DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому они легковесные и работают мгновенно.
|
Команда `ADD INDEX` добавляет описание индексов в метаданные, а `DROP INDEX` удаляет индекс из метаданных и стирает файлы индекса с диска, поэтому они легковесные и работают мгновенно.
|
||||||
|
|
||||||
Если индекс появился в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`.
|
Если индекс появился в метаданных, то он начнет считаться в последующих слияниях и записях в таблицу, а не сразу после выполнения операции `ALTER`.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user