From 6b6b0aa79a48791d35cc4a5508f5e7c31335eeab Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 26 Jan 2019 16:12:13 +0300 Subject: [PATCH 01/20] unique idx --- .../MergeTree/MergeTreeUniqueIndex.cpp | 116 ++++++++++++++++++ .../Storages/MergeTree/MergeTreeUniqueIndex.h | 75 +++++++++++ 2 files changed, 191 insertions(+) create mode 100644 dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp create mode 100644 dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp new file mode 100644 index 00000000000..f0cee127b1b --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -0,0 +1,116 @@ +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int INCORRECT_QUERY; +} + +MergeTreeUniqueGranule::MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index) + : MergeTreeIndexGranule(), index(index), block() +{ +} + +void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const +{ + if (empty()) + throw Exception( + "Attempt to write empty unique index `" + index.name + "`", ErrorCodes::LOGICAL_ERROR); + Poco::Logger * log = &Poco::Logger::get("unique_idx"); + + LOG_DEBUG(log, "serializeBinary Granule"); + + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + + type->serializeBinary(block.getByPosition(i).column, ostr); + } +} + +void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr) +{ + Poco::Logger * log = &Poco::Logger::get("unique_idx"); + + LOG_DEBUG(log, "deserializeBinary Granule"); + block.clear(); + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + + auto new_column = type->createColumn(); + type->deserializeBinary(*new_column, istr); + + block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i])); + } +} + +String MergeTreeUniqueGranule::toString() const +{ + String res = "unique granule:\n"; + + for (size_t i = 0; i < block.columns(); ++i) + { + const auto & column = block.getByPosition(i); + res += column.name; + res += " ["; + for (size_t j = 0; j < column.column->size(); ++j) + { + if (j != 0) + res += ", "; + Field field; + column.column->get(j, field); + res += applyVisitor(FieldVisitorToString(), field); + } + res += "]\n"; + } + + return res; +} + +void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_t limit) +{ + Poco::Logger * log = &Poco::Logger::get("unique_idx"); + + LOG_DEBUG(log, "update Granule " << new_block.columns() + << " pos: "<< *pos << " limit: " << limit << " rows: " << new_block.rows()); + + size_t cur = 0; + size_t block_size = new_block.getByPosition(0).column->size(); + + if (!block.columns()) + { + for (size_t i = 0; i < index.columns.size(); ++i) + { + const DataTypePtr & type = index.data_types[i]; + block.insert(ColumnWithTypeAndName(type->createColumn(), type, index.columns[i])); + } + } + + for (cur = 0; cur < limit && cur + *pos < block_size; ++cur) + { + Field field; + column->get(cur + *pos, field); + LOG_DEBUG(log, "upd:: " << applyVisitor(FieldVisitorToString(), field)); + if (parallelogram.size() <= i) + { + LOG_DEBUG(log, "emplaced"); + parallelogram.emplace_back(field, true, field, true); + } + else + { + parallelogram[i].left = std::min(parallelogram[i].left, field); + parallelogram[i].right = std::max(parallelogram[i].right, field); + } + } + *pos += cur; + + LOG_DEBUG(log, "updated rows_read: " << rows_read); + +}; + +} \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h new file mode 100644 index 00000000000..2b8fff3f810 --- /dev/null +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include +#include + +#include + + +namespace DB +{ + +class MergeTreeUniqueIndex; + +struct MergeTreeUniqueGranule : public MergeTreeIndexGranule +{ + explicit MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index); + + void serializeBinary(WriteBuffer & ostr) const override; + void deserializeBinary(ReadBuffer & istr) override; + + String toString() const override; + bool empty() const override { return !block.rows(); } + + void update(const Block & block, size_t * pos, size_t limit) override; + + ~MergeTreeUniqueGranule() override = default; + + const MergeTreeUniqueIndex & index; + Block block; +}; + +class UniqueCondition : public IndexCondition +{ +public: + UniqueCondition( + const SelectQueryInfo & query, + const Context & context, + const MergeTreeUniqueIndex & index); + + bool alwaysUnknownOrTrue() const override; + + bool mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const override; + + ~UniqueCondition() override = default; +private: + const MergeTreeUniqueIndex & index; + KeyCondition condition; +}; + + +class MergeTreeUniqueIndex : public MergeTreeIndex +{ +public: + MergeTreeUniqueIndex( + String name, + ExpressionActionsPtr expr, + const Names & columns, + const DataTypes & data_types, + size_t granularity) + : MergeTreeIndex(name, expr, columns, data_types, granularity) {} + + ~MergeTreeUniqueIndex() override = default; + + MergeTreeIndexGranulePtr createIndexGranule() const override; + + IndexConditionPtr createIndexCondition( + const SelectQueryInfo & query, const Context & context) const override; + +}; + +std::unique_ptr MergeTreeUniqueIndexCreator( + const MergeTreeData & data, std::shared_ptr node, const Context & context); + +} \ No newline at end of file From 387ed1b7c303c4566ea274b4299f3cc1efa28c26 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sat, 26 Jan 2019 18:45:38 +0300 Subject: [PATCH 02/20] unique --- .../MergingSortedBlockInputStream.h | 2 +- .../MergeTree/MergeTreeUniqueIndex.cpp | 111 ++++++++++++++---- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 8 +- .../MergeTree/registerStorageMergeTree.cpp | 2 + 4 files changed, 97 insertions(+), 26 deletions(-) diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.h b/dbms/src/DataStreams/MergingSortedBlockInputStream.h index 6521ef383ed..63d0ee9b2cc 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.h @@ -157,7 +157,7 @@ protected: using QueueWithCollation = std::priority_queue; QueueWithCollation queue_with_collation; - /// Used in Vertical merge algorithm to gather non-PK columns (on next step) + /// Used in Vertical merge algorithm to gather non-PK/non-index columns (on next step) /// If it is not nullptr then it should be populated during execution WriteBuffer * out_row_sources_buf; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index f0cee127b1b..c32496ef724 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -1,5 +1,11 @@ #include +#include +#include +#include +#include + +#include namespace DB { @@ -28,7 +34,7 @@ void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const { const DataTypePtr & type = index.data_types[i]; - type->serializeBinary(block.getByPosition(i).column, ostr); + type->serializeBinaryBulk(*block.getByPosition(i).column, ostr, 0, 0); } } @@ -43,7 +49,7 @@ void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr) const DataTypePtr & type = index.data_types[i]; auto new_column = type->createColumn(); - type->deserializeBinary(*new_column, istr); + type->deserializeBinaryBulk(*new_column, istr, 0, 0); block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i])); } @@ -79,7 +85,6 @@ void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_ LOG_DEBUG(log, "update Granule " << new_block.columns() << " pos: "<< *pos << " limit: " << limit << " rows: " << new_block.rows()); - size_t cur = 0; size_t block_size = new_block.getByPosition(0).column->size(); if (!block.columns()) @@ -91,26 +96,90 @@ void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_ } } - for (cur = 0; cur < limit && cur + *pos < block_size; ++cur) + for (size_t cur = 0; cur < limit && cur + *pos < block_size; ++cur) { - Field field; - column->get(cur + *pos, field); - LOG_DEBUG(log, "upd:: " << applyVisitor(FieldVisitorToString(), field)); - if (parallelogram.size() <= i) - { - LOG_DEBUG(log, "emplaced"); - parallelogram.emplace_back(field, true, field, true); - } - else - { - parallelogram[i].left = std::min(parallelogram[i].left, field); - parallelogram[i].right = std::max(parallelogram[i].right, field); - } + // TODO + ++(*pos); } - *pos += cur; - - LOG_DEBUG(log, "updated rows_read: " << rows_read); - }; +UniqueCondition::UniqueCondition( + const SelectQueryInfo &, + const Context &, + const MergeTreeUniqueIndex &index) + : IndexCondition(), index(index) {}; + +bool UniqueCondition::alwaysUnknownOrTrue() const +{ + return true; +} + +bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const +{ + auto granule = std::dynamic_pointer_cast(idx_granule); + if (!granule) + throw Exception( + "Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); + + return true; +} + + +MergeTreeIndexGranulePtr MergeTreeUniqueIndex::createIndexGranule() const +{ + return std::make_shared(*this); +} + +IndexConditionPtr MergeTreeUniqueIndex::createIndexCondition( + const SelectQueryInfo & query, const Context & context) const +{ + return std::make_shared(query, context, *this); +}; + + +std::unique_ptr MergeTreeUniqueIndexCreator( + const MergeTreeData & data, + std::shared_ptr node, + const Context & context) +{ + if (node->name.empty()) + throw Exception("Index must have unique name", ErrorCodes::INCORRECT_QUERY); + + size_t max_rows = 0; + if (node->type->arguments) + { + if (node->type->arguments->children.size() > 1) + throw Exception("Unique index cannot have only 0 or 1 argument", ErrorCodes::INCORRECT_QUERY); + else if (node->type->arguments->children.size() == 1) + max_rows = typeid_cast( + *node->type->arguments->children[0]).value.get(); + } + + + ASTPtr expr_list = MergeTreeData::extractKeyExpressionList(node->expr->clone()); + auto syntax = SyntaxAnalyzer(context, {}).analyze( + expr_list, data.getColumns().getAllPhysical()); + auto unique_expr = ExpressionAnalyzer(expr_list, syntax, context).getActions(false); + + auto sample = ExpressionAnalyzer(expr_list, syntax, context) + .getActions(true)->getSampleBlock(); + + Names columns; + DataTypes data_types; + + Poco::Logger * log = &Poco::Logger::get("unique_idx"); + LOG_DEBUG(log, "new unique index" << node->name); + for (size_t i = 0; i < expr_list->children.size(); ++i) + { + const auto & column = sample.getByPosition(i); + + columns.emplace_back(column.name); + data_types.emplace_back(column.type); + LOG_DEBUG(log, ">" << column.name << " " << column.type->getName()); + } + + return std::make_unique( + node->name, std::move(unique_expr), columns, data_types, node->granularity.get(), max_rows);; +} + } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 2b8fff3f810..01d782cd39a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -2,7 +2,6 @@ #include #include -#include #include @@ -45,7 +44,6 @@ public: ~UniqueCondition() override = default; private: const MergeTreeUniqueIndex & index; - KeyCondition condition; }; @@ -57,8 +55,9 @@ public: ExpressionActionsPtr expr, const Names & columns, const DataTypes & data_types, - size_t granularity) - : MergeTreeIndex(name, expr, columns, data_types, granularity) {} + size_t granularity, + size_t _max_rows) + : MergeTreeIndex(name, expr, columns, data_types, granularity), max_rows(_max_rows) {} ~MergeTreeUniqueIndex() override = default; @@ -67,6 +66,7 @@ public: IndexConditionPtr createIndexCondition( const SelectQueryInfo & query, const Context & context) const override; + size_t max_rows = 0; }; std::unique_ptr MergeTreeUniqueIndexCreator( diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index ebaba506864..ed0a859e495 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -627,6 +628,7 @@ static void registerMergeTreeSkipIndices() { auto & factory = MergeTreeIndexFactory::instance(); factory.registerIndex("minmax", MergeTreeMinMaxIndexCreator); + factory.registerIndex("unique", MergeTreeUniqueIndexCreator); } From 537b23ef15dac0ede27972660ced6cbcb82656e6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 27 Jan 2019 21:02:15 +0300 Subject: [PATCH 03/20] upd --- .../src/Storages/MergeTree/MergeTreeIndices.h | 3 + .../MergeTree/MergeTreeMinMaxIndex.cpp | 2 +- .../Storages/MergeTree/MergeTreeMinMaxIndex.h | 3 +- .../MergeTree/MergeTreeUniqueIndex.cpp | 81 +++++++++++++------ .../Storages/MergeTree/MergeTreeUniqueIndex.h | 10 ++- 5 files changed, 70 insertions(+), 29 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndices.h b/dbms/src/Storages/MergeTree/MergeTreeIndices.h index 7c7e83a743b..f47ba6a9121 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndices.h +++ b/dbms/src/Storages/MergeTree/MergeTreeIndices.h @@ -64,11 +64,13 @@ public: ExpressionActionsPtr expr, const Names & columns, const DataTypes & data_types, + const Block & header, size_t granularity) : name(name) , expr(expr) , columns(columns) , data_types(data_types) + , header(header) , granularity(granularity) {} virtual ~MergeTreeIndex() = default; @@ -85,6 +87,7 @@ public: ExpressionActionsPtr expr; Names columns; DataTypes data_types; + Block header; size_t granularity; }; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 78195c0dea9..1e0d874f1b7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -188,7 +188,7 @@ std::unique_ptr MergeTreeMinMaxIndexCreator( } return std::make_unique( - node->name, std::move(minmax_expr), columns, data_types, node->granularity.get());; + node->name, std::move(minmax_expr), columns, data_types, sample, node->granularity.get());; } } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 9ccadaf1f0e..6eb1bc5a762 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -57,8 +57,9 @@ public: ExpressionActionsPtr expr, const Names & columns, const DataTypes & data_types, + const Block & header, size_t granularity) - : MergeTreeIndex(name, expr, columns, data_types, granularity) {} + : MergeTreeIndex(name, expr, columns, data_types, header, granularity) {} ~MergeTreeMinMaxIndex() override = default; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index c32496ef724..f5811bbd42f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -17,8 +17,9 @@ namespace ErrorCodes } MergeTreeUniqueGranule::MergeTreeUniqueGranule(const MergeTreeUniqueIndex & index) - : MergeTreeIndexGranule(), index(index), block() + : MergeTreeIndexGranule(), index(index), set(new Set(SizeLimits{}, true)) { + set->setHeader(index.header); } void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const @@ -30,11 +31,16 @@ void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const LOG_DEBUG(log, "serializeBinary Granule"); + const auto & columns = set->getSetElements(); + + const auto & size_type = DataTypePtr(std::make_shared()); + size_type->serializeBinary(size(), ostr); + for (size_t i = 0; i < index.columns.size(); ++i) { - const DataTypePtr & type = index.data_types[i]; + const auto & type = index.data_types[i]; - type->serializeBinaryBulk(*block.getByPosition(i).column, ostr, 0, 0); + type->serializeBinaryBulk(*columns[i], ostr, 0, size()); } } @@ -43,33 +49,48 @@ void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr) Poco::Logger * log = &Poco::Logger::get("unique_idx"); LOG_DEBUG(log, "deserializeBinary Granule"); - block.clear(); + if (!set->empty()) + { + auto new_set = std::make_unique(SizeLimits{}, true); + set.swap(new_set); + } + + Block block; + + Field field_rows; + const auto & size_type = DataTypePtr(std::make_shared()); + size_type->deserializeBinary(field_rows, istr); + size_t rows_to_read = field_rows.get(); + for (size_t i = 0; i < index.columns.size(); ++i) { - const DataTypePtr & type = index.data_types[i]; + const auto & type = index.data_types[i]; auto new_column = type->createColumn(); - type->deserializeBinaryBulk(*new_column, istr, 0, 0); + type->deserializeBinaryBulk(*new_column, istr, rows_to_read, 0); block.insert(ColumnWithTypeAndName(new_column->getPtr(), type, index.columns[i])); } + + set->insertFromBlock(block); } String MergeTreeUniqueGranule::toString() const { String res = "unique granule:\n"; - for (size_t i = 0; i < block.columns(); ++i) + const auto & columns = set->getSetElements(); + + for (size_t i = 0; i < index.columns.size(); ++i) { - const auto & column = block.getByPosition(i); - res += column.name; + const auto & column = columns[i]; res += " ["; - for (size_t j = 0; j < column.column->size(); ++j) + for (size_t j = 0; j < column->size(); ++j) { if (j != 0) res += ", "; Field field; - column.column->get(j, field); + column->get(j, field); res += applyVisitor(FieldVisitorToString(), field); } res += "]\n"; @@ -84,23 +105,31 @@ void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_ LOG_DEBUG(log, "update Granule " << new_block.columns() << " pos: "<< *pos << " limit: " << limit << " rows: " << new_block.rows()); + size_t rows_read = std::min(limit, new_block.rows() - *pos); - size_t block_size = new_block.getByPosition(0).column->size(); - - if (!block.columns()) + if (index.max_rows && size() > index.max_rows) { - for (size_t i = 0; i < index.columns.size(); ++i) - { - const DataTypePtr & type = index.data_types[i]; - block.insert(ColumnWithTypeAndName(type->createColumn(), type, index.columns[i])); - } + *pos += rows_read; + return; } - for (size_t cur = 0; cur < limit && cur + *pos < block_size; ++cur) + Block key_block; + for (size_t i = 0; i < index.columns.size(); ++i) { - // TODO - ++(*pos); + const auto & name = index.columns[i]; + const auto & type = index.data_types[i]; + key_block.insert( + ColumnWithTypeAndName( + new_block.getByName(name).column->cut(*pos, rows_read), + type, + name)); } + + set->insertFromBlock(key_block); + + LOG_DEBUG(log, "unique rows: " << set->getTotalRowCount()); + + *pos += rows_read; }; UniqueCondition::UniqueCondition( @@ -111,7 +140,7 @@ UniqueCondition::UniqueCondition( bool UniqueCondition::alwaysUnknownOrTrue() const { - return true; + return false; } bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const @@ -164,6 +193,8 @@ std::unique_ptr MergeTreeUniqueIndexCreator( auto sample = ExpressionAnalyzer(expr_list, syntax, context) .getActions(true)->getSampleBlock(); + Block header; + Names columns; DataTypes data_types; @@ -175,11 +206,13 @@ std::unique_ptr MergeTreeUniqueIndexCreator( columns.emplace_back(column.name); data_types.emplace_back(column.type); + + header.insert(ColumnWithTypeAndName(column.type->createColumn(), column.type, column.name)); LOG_DEBUG(log, ">" << column.name << " " << column.type->getName()); } return std::make_unique( - node->name, std::move(unique_expr), columns, data_types, node->granularity.get(), max_rows);; + node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get(), max_rows);; } } \ No newline at end of file diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 01d782cd39a..d6d0c680252 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -3,6 +3,8 @@ #include #include +#include + #include @@ -19,14 +21,15 @@ struct MergeTreeUniqueGranule : public MergeTreeIndexGranule void deserializeBinary(ReadBuffer & istr) override; String toString() const override; - bool empty() const override { return !block.rows(); } + size_t size() const { return set->getTotalRowCount(); } + bool empty() const override { return !size(); } void update(const Block & block, size_t * pos, size_t limit) override; ~MergeTreeUniqueGranule() override = default; const MergeTreeUniqueIndex & index; - Block block; + std::unique_ptr set; }; class UniqueCondition : public IndexCondition @@ -55,9 +58,10 @@ public: ExpressionActionsPtr expr, const Names & columns, const DataTypes & data_types, + const Block & header, size_t granularity, size_t _max_rows) - : MergeTreeIndex(name, expr, columns, data_types, granularity), max_rows(_max_rows) {} + : MergeTreeIndex(name, expr, columns, data_types, header, granularity), max_rows(_max_rows) {} ~MergeTreeUniqueIndex() override = default; From c701cb7e00ffa52751ca32967bc7a0f453c7b3e5 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 27 Jan 2019 21:23:08 +0300 Subject: [PATCH 04/20] added getBlock --- dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp | 4 ++-- dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h | 2 ++ dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp | 12 ++++++++++-- dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h | 2 ++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp index 1e0d874f1b7..0970af3fa19 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.cpp @@ -116,14 +116,14 @@ void MergeTreeMinMaxGranule::update(const Block & block, size_t * pos, size_t li LOG_DEBUG(log, "updated rows_read: " << rows_read); *pos += rows_read; -}; +} MinMaxCondition::MinMaxCondition( const SelectQueryInfo &query, const Context &context, const MergeTreeMinMaxIndex &index) - : IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {}; + : IndexCondition(), index(index), condition(query, context, index.columns, index.expr) {} bool MinMaxCondition::alwaysUnknownOrTrue() const { diff --git a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h index 6eb1bc5a762..7c6fb41a3c4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMinMaxIndex.h @@ -12,6 +12,7 @@ namespace DB class MergeTreeMinMaxIndex; + struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule { explicit MergeTreeMinMaxGranule(const MergeTreeMinMaxIndex & index); @@ -30,6 +31,7 @@ struct MergeTreeMinMaxGranule : public MergeTreeIndexGranule std::vector parallelogram; }; + class MinMaxCondition : public IndexCondition { public: diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index f5811bbd42f..1cb206142c3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -130,13 +130,21 @@ void MergeTreeUniqueGranule::update(const Block & new_block, size_t * pos, size_ LOG_DEBUG(log, "unique rows: " << set->getTotalRowCount()); *pos += rows_read; -}; +} + +Block MergeTreeUniqueGranule::getElementsBlock() const +{ + if (index.max_rows && size() > index.max_rows) + return index.header; + return index.header.cloneWithColumns(set->getSetElements()); +} + UniqueCondition::UniqueCondition( const SelectQueryInfo &, const Context &, const MergeTreeUniqueIndex &index) - : IndexCondition(), index(index) {}; + : IndexCondition(), index(index) {} bool UniqueCondition::alwaysUnknownOrTrue() const { diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index d6d0c680252..602850313e4 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -25,6 +25,7 @@ struct MergeTreeUniqueGranule : public MergeTreeIndexGranule bool empty() const override { return !size(); } void update(const Block & block, size_t * pos, size_t limit) override; + Block getElementsBlock() const; ~MergeTreeUniqueGranule() override = default; @@ -32,6 +33,7 @@ struct MergeTreeUniqueGranule : public MergeTreeIndexGranule std::unique_ptr set; }; + class UniqueCondition : public IndexCondition { public: From 902c9a9e5c1b4bf2bd8436b7b49e2f6369971c36 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Sun, 27 Jan 2019 23:24:33 +0300 Subject: [PATCH 05/20] unique_condition --- .../MergeTree/MergeTreeUniqueIndex.cpp | 103 +++++++++++++++++- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 8 ++ 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 1cb206142c3..6755abf4272 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -141,10 +142,41 @@ Block MergeTreeUniqueGranule::getElementsBlock() const UniqueCondition::UniqueCondition( - const SelectQueryInfo &, - const Context &, + const SelectQueryInfo & query, + const Context & context, const MergeTreeUniqueIndex &index) - : IndexCondition(), index(index) {} + : IndexCondition(), index(index) +{ + for (size_t i = 0, size = index.columns.size(); i < size; ++i) + { + std::string name = index.columns[i]; + if (!key_columns.count(name)) + key_columns[name] = i; + } + + const ASTSelectQuery & select = typeid_cast(*query.query); + + /// Replace logical functions with bit functions. + /// Working with UInt8: last bit -- can be true, previous -- can be false. + ASTPtr new_expression; + if (select.where_expression && select.prewhere_expression) + new_expression = makeASTFunction( + "and", + select.where_expression->clone(), + select.prewhere_expression->clone()); + else if (select.where_expression) + new_expression = select.where_expression->clone(); + else if (select.prewhere_expression) + new_expression = select.prewhere_expression->clone(); + else + /// 11_2 -- can be true and false at the same time + new_expression = std::make_shared(Field(3)); + + new_expression = makeASTFunction( + "bitAnd", + new_expression, + std::make_shared(Field(1))); +} bool UniqueCondition::alwaysUnknownOrTrue() const { @@ -161,6 +193,71 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c return true; } +void UniqueCondition::traverseAST(ASTPtr & node, const Context & context) +{ + if (ASTFunction * func = typeid_cast(&*node)) + { + if (operatorFromAST(func)) { + auto & args = typeid_cast(*func->arguments).children; + + for (size_t i = 0, size = args.size(); i < size; ++i) + traverseAST(args[i], context); + return; + } + } + + if (!atomFromAST(node, context)) + *node = ASTLiteral(Field(3)); /// Unknown +} + +bool termFromAST(const ASTPtr & node, const Context & context) +{ + /// function with args + return false; + termFromAST(node, context); +} + +bool UniqueCondition::atomFromAST(const ASTPtr & node, const Context & context) +{ + /// Functions < > = != <= >= in `notIn` + if (termFromAST(node, context)) + return true; + + if (const ASTFunction * func = typeid_cast(node.get())) + { + const ASTs & args = typeid_cast(*func->arguments).children; + + for (size_t i = 0, size = args.size(); i < size; ++i) + if (!termFromAST(args[i], context)) + return false; + + return true; + } + + return false; +} + +bool UniqueCondition::operatorFromAST(ASTFunction * func) +{ + /// Functions AND, OR, NOT. Replace with bit*. + const ASTs & args = typeid_cast(*func->arguments).children; + + if (func->name == "not") + { + if (args.size() != 1) + return false; + func->name = "bitNot"; + } + else if (func->name == "and" || func->name == "indexHint") + func->name = "bitAnd"; + else if (func->name == "or") + func->name = "bitOR"; + else + return false; + + return true; +} + MergeTreeIndexGranulePtr MergeTreeUniqueIndex::createIndexGranule() const { diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 602850313e4..269d56f38cb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -48,7 +48,15 @@ public: ~UniqueCondition() override = default; private: + void traverseAST(ASTPtr & node, const Context & context); + bool termFromAST(const ASTPtr & node, const Context & context); + bool atomFromAST(const ASTPtr & node, const Context & context); + bool operatorFromAST(ASTFunction * func); + const MergeTreeUniqueIndex & index; + + std::map key_columns; + ExpressionActionsPtr actions; }; From aa37d950360ce09bd4b44bc6c33f96ba0f3736ad Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Jan 2019 00:08:14 +0300 Subject: [PATCH 06/20] added termForAST --- .../MergeTree/MergeTreeUniqueIndex.cpp | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 6755abf4272..f85ea2ef085 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -3,8 +3,10 @@ #include #include #include -#include + +#include #include +#include #include @@ -176,6 +178,8 @@ UniqueCondition::UniqueCondition( "bitAnd", new_expression, std::make_shared(Field(1))); + + traverseAST(new_expression, context); } bool UniqueCondition::alwaysUnknownOrTrue() const @@ -210,11 +214,30 @@ void UniqueCondition::traverseAST(ASTPtr & node, const Context & context) *node = ASTLiteral(Field(3)); /// Unknown } -bool termFromAST(const ASTPtr & node, const Context & context) +bool UniqueCondition::termFromAST(const ASTPtr & node, const Context & context) { - /// function with args + /// Function, literal or column + + if (const ASTLiteral * lit = typeid_cast(node.get())) + return true; + + if (const ASTIdentifier * identifier = typeid_cast(node.get())) + return key_columns.count(identifier->name) != 0; + + if (ASTFunction * func = typeid_cast(&*node)) { + if (key_columns.count(func->name) != 0) + return true; + + const ASTs & args = typeid_cast(*func->arguments).children; + + for (size_t i = 0, size = args.size(); i < size; ++i) + if (!termFromAST(args[i], context)) + return false; + + return true; + } + return false; - termFromAST(node, context); } bool UniqueCondition::atomFromAST(const ASTPtr & node, const Context & context) From 00ec4cc6ca6a1caa33afd6e669a357e678d1067a Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Jan 2019 10:05:24 +0300 Subject: [PATCH 07/20] unique --- .../MergeTree/MergeTreeUniqueIndex.cpp | 30 +++++++++---------- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 6 ++-- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index f85ea2ef085..94471e350f5 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -145,7 +145,7 @@ Block MergeTreeUniqueGranule::getElementsBlock() const UniqueCondition::UniqueCondition( const SelectQueryInfo & query, - const Context & context, + const Context &, const MergeTreeUniqueIndex &index) : IndexCondition(), index(index) { @@ -179,7 +179,7 @@ UniqueCondition::UniqueCondition( new_expression, std::make_shared(Field(1))); - traverseAST(new_expression, context); + traverseAST(new_expression); } bool UniqueCondition::alwaysUnknownOrTrue() const @@ -197,7 +197,7 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c return true; } -void UniqueCondition::traverseAST(ASTPtr & node, const Context & context) +void UniqueCondition::traverseAST(ASTPtr & node) { if (ASTFunction * func = typeid_cast(&*node)) { @@ -205,33 +205,33 @@ void UniqueCondition::traverseAST(ASTPtr & node, const Context & context) auto & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - traverseAST(args[i], context); + traverseAST(args[i]); return; } } - if (!atomFromAST(node, context)) + if (!atomFromAST(node)) *node = ASTLiteral(Field(3)); /// Unknown } -bool UniqueCondition::termFromAST(const ASTPtr & node, const Context & context) +bool UniqueCondition::termFromAST(const ASTPtr & node) { /// Function, literal or column - if (const ASTLiteral * lit = typeid_cast(node.get())) + if (typeid_cast(node.get())) return true; if (const ASTIdentifier * identifier = typeid_cast(node.get())) - return key_columns.count(identifier->name) != 0; + return key_columns.count(identifier->getColumnName()) != 0; - if (ASTFunction * func = typeid_cast(&*node)) { - if (key_columns.count(func->name) != 0) + if (const ASTFunction * func = typeid_cast(node.get())) { + if (key_columns.count(func->getColumnName())) return true; const ASTs & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - if (!termFromAST(args[i], context)) + if (!termFromAST(args[i])) return false; return true; @@ -240,10 +240,10 @@ bool UniqueCondition::termFromAST(const ASTPtr & node, const Context & context) return false; } -bool UniqueCondition::atomFromAST(const ASTPtr & node, const Context & context) +bool UniqueCondition::atomFromAST(const ASTPtr & node) { /// Functions < > = != <= >= in `notIn` - if (termFromAST(node, context)) + if (termFromAST(node)) return true; if (const ASTFunction * func = typeid_cast(node.get())) @@ -251,7 +251,7 @@ bool UniqueCondition::atomFromAST(const ASTPtr & node, const Context & context) const ASTs & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - if (!termFromAST(args[i], context)) + if (!termFromAST(args[i])) return false; return true; @@ -269,7 +269,7 @@ bool UniqueCondition::operatorFromAST(ASTFunction * func) { if (args.size() != 1) return false; - func->name = "bitNot"; + func->name = "bitNot"; /// 3 - val } else if (func->name == "and" || func->name == "indexHint") func->name = "bitAnd"; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 269d56f38cb..d0599a962cf 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -48,9 +48,9 @@ public: ~UniqueCondition() override = default; private: - void traverseAST(ASTPtr & node, const Context & context); - bool termFromAST(const ASTPtr & node, const Context & context); - bool atomFromAST(const ASTPtr & node, const Context & context); + void traverseAST(ASTPtr & node); + bool atomFromAST(const ASTPtr & node); + bool termFromAST(const ASTPtr & node); bool operatorFromAST(ASTFunction * func); const MergeTreeUniqueIndex & index; From a74ed80f99907317198721631b6fea93e750c882 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Jan 2019 10:24:32 +0300 Subject: [PATCH 08/20] fixed not --- .../MergeTree/MergeTreeUniqueIndex.cpp | 41 ++++++++++++++----- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 2 +- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 94471e350f5..9614036953d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -199,15 +199,13 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c void UniqueCondition::traverseAST(ASTPtr & node) { - if (ASTFunction * func = typeid_cast(&*node)) - { - if (operatorFromAST(func)) { - auto & args = typeid_cast(*func->arguments).children; + if (operatorFromAST(node)) { + ASTFunction * func = typeid_cast(&*node); + auto & args = typeid_cast(*func->arguments).children; - for (size_t i = 0, size = args.size(); i < size; ++i) - traverseAST(args[i]); - return; - } + for (size_t i = 0, size = args.size(); i < size; ++i) + traverseAST(args[i]); + return; } if (!atomFromAST(node)) @@ -260,16 +258,39 @@ bool UniqueCondition::atomFromAST(const ASTPtr & node) return false; } -bool UniqueCondition::operatorFromAST(ASTFunction * func) +bool UniqueCondition::operatorFromAST(ASTPtr & node) { /// Functions AND, OR, NOT. Replace with bit*. + ASTFunction * func = typeid_cast(&*node); + if (!func) + return false; + const ASTs & args = typeid_cast(*func->arguments).children; if (func->name == "not") { if (args.size() != 1) return false; - func->name = "bitNot"; /// 3 - val + + auto one = std::make_shared(Field(1)); + auto two = std::make_shared(Field(2)); + + node = makeASTFunction( + "bitOr", + makeASTFunction( + "bitShiftLeft", + makeASTFunction( + "bitAnd", + node->clone(), + one->clone()), + one->clone()), + makeASTFunction( + "bitShiftRight", + makeASTFunction( + "bitAnd", + node->clone(), + two->clone()), + one->clone())); } else if (func->name == "and" || func->name == "indexHint") func->name = "bitAnd"; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index d0599a962cf..8e3434a9456 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -51,7 +51,7 @@ private: void traverseAST(ASTPtr & node); bool atomFromAST(const ASTPtr & node); bool termFromAST(const ASTPtr & node); - bool operatorFromAST(ASTFunction * func); + bool operatorFromAST(ASTPtr & node); const MergeTreeUniqueIndex & index; From b4d1bf869b7b5b2c87d4622a8c3073426d0f7907 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Jan 2019 11:15:13 +0300 Subject: [PATCH 09/20] uniqueCondition::mayBeTrueOnGranule --- .../MergeTree/MergeTreeUniqueIndex.cpp | 61 +++++++++++++++---- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 9 +-- 2 files changed, 54 insertions(+), 16 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 9614036953d..251f7765bc2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -145,7 +145,7 @@ Block MergeTreeUniqueGranule::getElementsBlock() const UniqueCondition::UniqueCondition( const SelectQueryInfo & query, - const Context &, + const Context & context, const MergeTreeUniqueIndex &index) : IndexCondition(), index(index) { @@ -174,12 +174,30 @@ UniqueCondition::UniqueCondition( /// 11_2 -- can be true and false at the same time new_expression = std::make_shared(Field(3)); - new_expression = makeASTFunction( + expression_ast = makeASTFunction( "bitAnd", new_expression, std::make_shared(Field(1))); - traverseAST(new_expression); + traverseAST(expression_ast); + + + /// expression for alwaysUnknownOrTrue() checking + /*auto check_expression = expression_ast->clone(); + traverseAST(check_expression, replace_all = true); + + Block result + { + { DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared(), "_dummy" } + }; + + const auto check_expr = ExpressionAnalyzer(check_expression, query.syntax_analyzer_result, context).getActions(true); + check_expr->execute(result); + alwaysNotFalse = result.getByName(check_expression->getColumnName()).column->getBool(0); + if (!alwaysNotFalse) + return*/ + + actions = ExpressionAnalyzer(expression_ast, query.syntax_analyzer_result, context).getActions(true); } bool UniqueCondition::alwaysUnknownOrTrue() const @@ -194,31 +212,47 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c throw Exception( "Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); - return true; + if (granule->size() > index.max_rows) + return true; + + Block result = granule->getElementsBlock(); + actions->execute(result); + + + const auto & column = result.getByName(expression_ast->getColumnName()).column; + + for (size_t i = 0; i < column->size(); ++i) + if (column->getBool(i)) + return true; + + return false; } -void UniqueCondition::traverseAST(ASTPtr & node) +void UniqueCondition::traverseAST(ASTPtr & node, bool replace_all) const { if (operatorFromAST(node)) { ASTFunction * func = typeid_cast(&*node); auto & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - traverseAST(args[i]); + traverseAST(args[i], replace_all); return; } - if (!atomFromAST(node)) + if (!atomFromAST(node, replace_all)) *node = ASTLiteral(Field(3)); /// Unknown } -bool UniqueCondition::termFromAST(const ASTPtr & node) +bool UniqueCondition::termFromAST(const ASTPtr & node, bool replace_all) const { /// Function, literal or column if (typeid_cast(node.get())) return true; + if (replace_all) + return false; + if (const ASTIdentifier * identifier = typeid_cast(node.get())) return key_columns.count(identifier->getColumnName()) != 0; @@ -238,18 +272,21 @@ bool UniqueCondition::termFromAST(const ASTPtr & node) return false; } -bool UniqueCondition::atomFromAST(const ASTPtr & node) +bool UniqueCondition::atomFromAST(const ASTPtr & node, bool replace_all) const { /// Functions < > = != <= >= in `notIn` - if (termFromAST(node)) + if (termFromAST(node, replace_all)) return true; + if (replace_all) + return false; + if (const ASTFunction * func = typeid_cast(node.get())) { const ASTs & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - if (!termFromAST(args[i])) + if (!termFromAST(args[i], replace_all)) return false; return true; @@ -258,7 +295,7 @@ bool UniqueCondition::atomFromAST(const ASTPtr & node) return false; } -bool UniqueCondition::operatorFromAST(ASTPtr & node) +bool UniqueCondition::operatorFromAST(ASTPtr & node) const { /// Functions AND, OR, NOT. Replace with bit*. ASTFunction * func = typeid_cast(&*node); diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 8e3434a9456..83afd0756dc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -48,14 +48,15 @@ public: ~UniqueCondition() override = default; private: - void traverseAST(ASTPtr & node); - bool atomFromAST(const ASTPtr & node); - bool termFromAST(const ASTPtr & node); - bool operatorFromAST(ASTPtr & node); + void traverseAST(ASTPtr & node, bool replace_all = false) const; + bool atomFromAST(const ASTPtr & node, bool replace_all = false) const; + bool termFromAST(const ASTPtr & node, bool replace_all = false) const; + bool operatorFromAST(ASTPtr & node) const; const MergeTreeUniqueIndex & index; std::map key_columns; + ASTPtr expression_ast; ExpressionActionsPtr actions; }; From 7c036e9b43b18824c94de40505b6a5a7f81bcedb Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Mon, 28 Jan 2019 13:35:19 +0300 Subject: [PATCH 10/20] fix --- dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 251f7765bc2..aec7fffa804 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -55,6 +55,7 @@ void MergeTreeUniqueGranule::deserializeBinary(ReadBuffer & istr) if (!set->empty()) { auto new_set = std::make_unique(SizeLimits{}, true); + new_set->setHeader(index.header); set.swap(new_set); } @@ -212,7 +213,7 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c throw Exception( "Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); - if (granule->size() > index.max_rows) + if (index.max_rows && granule->size() > index.max_rows) return true; Block result = granule->getElementsBlock(); From 149b0d84f9f4e74a2d3971226f0cfeab01e61b78 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 16:13:18 +0300 Subject: [PATCH 11/20] fixed bug with double column --- .../MergeTree/MergeTreeUniqueIndex.cpp | 42 ++++++++++--------- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 6 +-- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index aec7fffa804..d98af70dfbe 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -198,7 +198,12 @@ UniqueCondition::UniqueCondition( if (!alwaysNotFalse) return*/ - actions = ExpressionAnalyzer(expression_ast, query.syntax_analyzer_result, context).getActions(true); + auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze( + expression_ast, index.header.getNamesAndTypesList()); + actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); + + Poco::Logger * log = &Poco::Logger::get("unique_idx"); + LOG_DEBUG(log, "new unique index" << actions->dumpActions()); } bool UniqueCondition::alwaysUnknownOrTrue() const @@ -229,39 +234,41 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c return false; } -void UniqueCondition::traverseAST(ASTPtr & node, bool replace_all) const +void UniqueCondition::traverseAST(ASTPtr & node) const { if (operatorFromAST(node)) { ASTFunction * func = typeid_cast(&*node); auto & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - traverseAST(args[i], replace_all); + traverseAST(args[i]); return; } - if (!atomFromAST(node, replace_all)) - *node = ASTLiteral(Field(3)); /// Unknown + if (!atomFromAST(node)) + node = std::make_shared(Field(3)); /// can_be_true=1 can_be_false=0 } -bool UniqueCondition::termFromAST(const ASTPtr & node, bool replace_all) const +bool UniqueCondition::termFromAST(ASTPtr & node) const { /// Function, literal or column if (typeid_cast(node.get())) return true; - if (replace_all) - return false; - if (const ASTIdentifier * identifier = typeid_cast(node.get())) return key_columns.count(identifier->getColumnName()) != 0; - if (const ASTFunction * func = typeid_cast(node.get())) { + if (ASTFunction * func = typeid_cast(node.get())) + { if (key_columns.count(func->getColumnName())) + { + /// Function is already calculated. + node = std::make_shared(func->getColumnName()); return true; + } - const ASTs & args = typeid_cast(*func->arguments).children; + ASTs & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) if (!termFromAST(args[i])) @@ -273,21 +280,18 @@ bool UniqueCondition::termFromAST(const ASTPtr & node, bool replace_all) const return false; } -bool UniqueCondition::atomFromAST(const ASTPtr & node, bool replace_all) const +bool UniqueCondition::atomFromAST(ASTPtr & node) const { /// Functions < > = != <= >= in `notIn` - if (termFromAST(node, replace_all)) + if (termFromAST(node)) return true; - if (replace_all) - return false; - - if (const ASTFunction * func = typeid_cast(node.get())) + if (ASTFunction * func = typeid_cast(node.get())) { - const ASTs & args = typeid_cast(*func->arguments).children; + ASTs & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - if (!termFromAST(args[i], replace_all)) + if (!termFromAST(args[i])) return false; return true; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 83afd0756dc..bc34d0f0bdb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -48,9 +48,9 @@ public: ~UniqueCondition() override = default; private: - void traverseAST(ASTPtr & node, bool replace_all = false) const; - bool atomFromAST(const ASTPtr & node, bool replace_all = false) const; - bool termFromAST(const ASTPtr & node, bool replace_all = false) const; + void traverseAST(ASTPtr & node) const; + bool atomFromAST(ASTPtr & node) const; + bool termFromAST(ASTPtr & node) const; bool operatorFromAST(ASTPtr & node) const; const MergeTreeUniqueIndex & index; From 67af98670e7e51930b52ab965f10823a27eaad76 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 19:53:44 +0300 Subject: [PATCH 12/20] is always true --- .../MergeTree/MergeTreeUniqueIndex.cpp | 103 +++++++++++------- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 4 +- 2 files changed, 65 insertions(+), 42 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index d98af70dfbe..1c6065b79d3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -10,6 +10,9 @@ #include +#include + + namespace DB { @@ -160,7 +163,7 @@ UniqueCondition::UniqueCondition( const ASTSelectQuery & select = typeid_cast(*query.query); /// Replace logical functions with bit functions. - /// Working with UInt8: last bit -- can be true, previous -- can be false. + /// Working with UInt8: last bit = can be true, previous = can be false. ASTPtr new_expression; if (select.where_expression && select.prewhere_expression) new_expression = makeASTFunction( @@ -175,6 +178,11 @@ UniqueCondition::UniqueCondition( /// 11_2 -- can be true and false at the same time new_expression = std::make_shared(Field(3)); + useless = checkASTAlwaysUnknownOrTrue(new_expression); + /// Do not proceed if index is useless for this query. + if (useless) + return; + expression_ast = makeASTFunction( "bitAnd", new_expression, @@ -182,22 +190,6 @@ UniqueCondition::UniqueCondition( traverseAST(expression_ast); - - /// expression for alwaysUnknownOrTrue() checking - /*auto check_expression = expression_ast->clone(); - traverseAST(check_expression, replace_all = true); - - Block result - { - { DataTypeUInt8().createColumnConstWithDefaultValue(1), std::make_shared(), "_dummy" } - }; - - const auto check_expr = ExpressionAnalyzer(check_expression, query.syntax_analyzer_result, context).getActions(true); - check_expr->execute(result); - alwaysNotFalse = result.getByName(check_expression->getColumnName()).column->getBool(0); - if (!alwaysNotFalse) - return*/ - auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze( expression_ast, index.header.getNamesAndTypesList()); actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); @@ -208,7 +200,7 @@ UniqueCondition::UniqueCondition( bool UniqueCondition::alwaysUnknownOrTrue() const { - return false; + return useless; } bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const @@ -218,6 +210,9 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c throw Exception( "Unique index condition got wrong granule", ErrorCodes::LOGICAL_ERROR); + if (useless) + return true; + if (index.max_rows && granule->size() > index.max_rows) return true; @@ -249,8 +244,9 @@ void UniqueCondition::traverseAST(ASTPtr & node) const node = std::make_shared(Field(3)); /// can_be_true=1 can_be_false=0 } -bool UniqueCondition::termFromAST(ASTPtr & node) const +bool UniqueCondition::atomFromAST(ASTPtr & node) const { + /// Functions < > = != <= >= in `notIn` /// Function, literal or column if (typeid_cast(node.get())) @@ -271,27 +267,7 @@ bool UniqueCondition::termFromAST(ASTPtr & node) const ASTs & args = typeid_cast(*func->arguments).children; for (size_t i = 0, size = args.size(); i < size; ++i) - if (!termFromAST(args[i])) - return false; - - return true; - } - - return false; -} - -bool UniqueCondition::atomFromAST(ASTPtr & node) const -{ - /// Functions < > = != <= >= in `notIn` - if (termFromAST(node)) - return true; - - if (ASTFunction * func = typeid_cast(node.get())) - { - ASTs & args = typeid_cast(*func->arguments).children; - - for (size_t i = 0, size = args.size(); i < size; ++i) - if (!termFromAST(args[i])) + if (!atomFromAST(args[i])) return false; return true; @@ -337,13 +313,58 @@ bool UniqueCondition::operatorFromAST(ASTPtr & node) const else if (func->name == "and" || func->name == "indexHint") func->name = "bitAnd"; else if (func->name == "or") - func->name = "bitOR"; + func->name = "bitOr"; else return false; return true; } +bool checkAtomName(const String & name) +{ + static std::set atoms = { + "notEquals", + "equals", + "less", + "greater", + "lessOrEquals", + "greaterOrEquals", + "in", + "notIn", + "like" + }; + return atoms.find(name) != atoms.end(); +} + +bool UniqueCondition::checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic) const +{ + if (const auto * func = typeid_cast(node.get())) + { + if (key_columns.count(func->getColumnName())) + return false; + + const ASTs & args = typeid_cast(*func->arguments).children; + + if (func->name == "and" || func->name == "indexHint") + return checkASTAlwaysUnknownOrTrue(args[0], atomic) && checkASTAlwaysUnknownOrTrue(args[1], atomic); + else if (func->name == "or") + return checkASTAlwaysUnknownOrTrue(args[0], atomic) || checkASTAlwaysUnknownOrTrue(args[1], atomic); + else if (func->name == "not") + return checkASTAlwaysUnknownOrTrue(args[0], atomic); + else if (!atomic && checkAtomName(func->name)) + return checkASTAlwaysUnknownOrTrue(node, true); + else + return std::any_of(args.begin(), args.end(), + [this, &atomic](const auto & arg) { return checkASTAlwaysUnknownOrTrue(arg, atomic); }); + } + else if (const auto * literal = typeid_cast(node.get())) + return !atomic && literal->value.get(); + else if (const auto * identifier = typeid_cast(node.get())) + return key_columns.find(identifier->getColumnName()) == key_columns.end(); + else + return true; +} + MergeTreeIndexGranulePtr MergeTreeUniqueIndex::createIndexGranule() const { diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index bc34d0f0bdb..be5c1f11102 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -50,11 +50,13 @@ public: private: void traverseAST(ASTPtr & node) const; bool atomFromAST(ASTPtr & node) const; - bool termFromAST(ASTPtr & node) const; bool operatorFromAST(ASTPtr & node) const; + bool checkASTAlwaysUnknownOrTrue(const ASTPtr & node, bool atomic = false) const; + const MergeTreeUniqueIndex & index; + bool useless; std::map key_columns; ASTPtr expression_ast; ExpressionActionsPtr actions; From 68ea3f0ae0eb8dc79b7acf1101a9d22ebfd55f15 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 20:09:17 +0300 Subject: [PATCH 13/20] fix --- .../Storages/MergeTree/MergeTreeUniqueIndex.cpp | 17 ++++++++--------- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 1c6065b79d3..82891708227 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -18,7 +18,6 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; } @@ -232,11 +231,11 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c void UniqueCondition::traverseAST(ASTPtr & node) const { if (operatorFromAST(node)) { - ASTFunction * func = typeid_cast(&*node); + auto * func = typeid_cast(&*node); auto & args = typeid_cast(*func->arguments).children; - for (size_t i = 0, size = args.size(); i < size; ++i) - traverseAST(args[i]); + for (auto & arg : args) + traverseAST(arg); return; } @@ -252,10 +251,10 @@ bool UniqueCondition::atomFromAST(ASTPtr & node) const if (typeid_cast(node.get())) return true; - if (const ASTIdentifier * identifier = typeid_cast(node.get())) + if (const auto * identifier = typeid_cast(node.get())) return key_columns.count(identifier->getColumnName()) != 0; - if (ASTFunction * func = typeid_cast(node.get())) + if (auto * func = typeid_cast(node.get())) { if (key_columns.count(func->getColumnName())) { @@ -266,8 +265,8 @@ bool UniqueCondition::atomFromAST(ASTPtr & node) const ASTs & args = typeid_cast(*func->arguments).children; - for (size_t i = 0, size = args.size(); i < size; ++i) - if (!atomFromAST(args[i])) + for (auto & arg : args) + if (!atomFromAST(arg)) return false; return true; @@ -279,7 +278,7 @@ bool UniqueCondition::atomFromAST(ASTPtr & node) const bool UniqueCondition::operatorFromAST(ASTPtr & node) const { /// Functions AND, OR, NOT. Replace with bit*. - ASTFunction * func = typeid_cast(&*node); + auto * func = typeid_cast(&*node); if (!func) return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index be5c1f11102..8888362464b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -74,7 +74,7 @@ public: const Block & header, size_t granularity, size_t _max_rows) - : MergeTreeIndex(name, expr, columns, data_types, header, granularity), max_rows(_max_rows) {} + : MergeTreeIndex(std::move(name), std::move(expr), columns, data_types, header, granularity), max_rows(_max_rows) {} ~MergeTreeUniqueIndex() override = default; From 4ec0923fc9dce1914759fbb24b8a3bb02af1def9 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 20:26:45 +0300 Subject: [PATCH 14/20] key set --- .../MergeTree/MergeTreeUniqueIndex.cpp | 18 +++++++++++------- .../Storages/MergeTree/MergeTreeUniqueIndex.h | 3 ++- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 82891708227..f5aa79ade65 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -10,8 +10,6 @@ #include -#include - namespace DB { @@ -39,6 +37,13 @@ void MergeTreeUniqueGranule::serializeBinary(WriteBuffer & ostr) const const auto & columns = set->getSetElements(); const auto & size_type = DataTypePtr(std::make_shared()); + + if (index.max_rows && size() > index.max_rows) + { + size_type->serializeBinary(0, ostr); + return; + } + size_type->serializeBinary(size(), ostr); for (size_t i = 0; i < index.columns.size(); ++i) @@ -156,7 +161,7 @@ UniqueCondition::UniqueCondition( { std::string name = index.columns[i]; if (!key_columns.count(name)) - key_columns[name] = i; + key_columns.insert(name); } const ASTSelectQuery & select = typeid_cast(*query.query); @@ -240,12 +245,11 @@ void UniqueCondition::traverseAST(ASTPtr & node) const } if (!atomFromAST(node)) - node = std::make_shared(Field(3)); /// can_be_true=1 can_be_false=0 + node = std::make_shared(Field(3)); /// can_be_true=1 can_be_false=1 } bool UniqueCondition::atomFromAST(ASTPtr & node) const { - /// Functions < > = != <= >= in `notIn` /// Function, literal or column if (typeid_cast(node.get())) @@ -289,8 +293,8 @@ bool UniqueCondition::operatorFromAST(ASTPtr & node) const if (args.size() != 1) return false; - auto one = std::make_shared(Field(1)); - auto two = std::make_shared(Field(2)); + const auto one = std::make_shared(Field(1)); + const auto two = std::make_shared(Field(2)); node = makeASTFunction( "bitOr", diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 8888362464b..044632d658c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -6,6 +6,7 @@ #include #include +#include namespace DB @@ -57,7 +58,7 @@ private: const MergeTreeUniqueIndex & index; bool useless; - std::map key_columns; + std::set key_columns; ASTPtr expression_ast; ExpressionActionsPtr actions; }; From 9311c01ef778ed60fd5496a9bc6b5c533c67f874 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 20:28:13 +0300 Subject: [PATCH 15/20] spaces --- dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index f5aa79ade65..ac5e79e9906 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -430,4 +430,4 @@ std::unique_ptr MergeTreeUniqueIndexCreator( node->name, std::move(unique_expr), columns, data_types, header, node->granularity.get(), max_rows);; } -} \ No newline at end of file +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h index 044632d658c..c73fb2643a8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.h @@ -90,4 +90,4 @@ public: std::unique_ptr MergeTreeUniqueIndexCreator( const MergeTreeData & data, std::shared_ptr node, const Context & context); -} \ No newline at end of file +} From 514987e9f975c2a2a3522e6dbab62848aa9d41c6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 20:46:16 +0300 Subject: [PATCH 16/20] test --- dbms/tests/queries/0_stateless/00832_unique_index.reference | 0 dbms/tests/queries/0_stateless/00832_unique_index.sql | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00832_unique_index.reference create mode 100644 dbms/tests/queries/0_stateless/00832_unique_index.sql diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.reference b/dbms/tests/queries/0_stateless/00832_unique_index.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.sql b/dbms/tests/queries/0_stateless/00832_unique_index.sql new file mode 100644 index 00000000000..e69de29bb2d From d3b430d4fd6d972cadcfa7a6f362bdea90a11dd6 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 20:46:46 +0300 Subject: [PATCH 17/20] tests --- .../0_stateless/00825_minmax_index.sql | 9 ++-- .../0_stateless/00832_unique_index.reference | 8 ++++ .../0_stateless/00832_unique_index.sql | 41 +++++++++++++++++++ 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00825_minmax_index.sql b/dbms/tests/queries/0_stateless/00825_minmax_index.sql index fa233a15819..39271dc18e6 100644 --- a/dbms/tests/queries/0_stateless/00825_minmax_index.sql +++ b/dbms/tests/queries/0_stateless/00825_minmax_index.sql @@ -9,12 +9,9 @@ CREATE TABLE test.minmax_idx s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, - INDEX - idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4, - INDEX - idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, - INDEX - idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 + INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 4, + INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE minmax GRANULARITY 2, + INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE minmax GRANULARITY 3 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 2; diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.reference b/dbms/tests/queries/0_stateless/00832_unique_index.reference index e69de29bb2d..046f6502f65 100644 --- a/dbms/tests/queries/0_stateless/00832_unique_index.reference +++ b/dbms/tests/queries/0_stateless/00832_unique_index.reference @@ -0,0 +1,8 @@ +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 +0 5 4.7 6.50 cba b 2014-01-04 +0 5 4.7 6.50 cba b 2014-03-11 +2 5 4.7 6.50 cba b 2014-06-11 +2 5 4.7 6.50 cba b 2015-01-01 diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.sql b/dbms/tests/queries/0_stateless/00832_unique_index.sql index e69de29bb2d..7bbeb074ed4 100644 --- a/dbms/tests/queries/0_stateless/00832_unique_index.sql +++ b/dbms/tests/queries/0_stateless/00832_unique_index.sql @@ -0,0 +1,41 @@ +DROP TABLE IF EXISTS test.minmax_idx; + +CREATE TABLE test.minmax_idx +( + u64 UInt64, + i32 Int32, + f64 Float64, + d Decimal(10, 2), + s String, + e Enum8('a' = 1, 'b' = 2, 'c' = 3), + dt Date, + INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 4, + INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 2, + INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE unique() GRANULARITY 3 +) ENGINE = MergeTree() +ORDER BY u64 +SETTINGS index_granularity = 2; + + +/* many small inserts => table will make merges */ +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); + +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); + +/* simple select */ +SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; + +/* select with hole made by primary key */ +SELECT * FROM test.minmax_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt; + +DROP TABLE test.minmax_idx; \ No newline at end of file From c4dad05b87edfa892dc9b1c12bba27e157223df1 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 21:00:40 +0300 Subject: [PATCH 18/20] fix --- .../MergeTree/MergeTreeUniqueIndex.cpp | 3 -- .../0_stateless/00832_unique_index.sql | 36 +++++++++---------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index ac5e79e9906..19ffb259d39 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -197,9 +197,6 @@ UniqueCondition::UniqueCondition( auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze( expression_ast, index.header.getNamesAndTypesList()); actions = ExpressionAnalyzer(expression_ast, syntax_analyzer_result, context).getActions(true); - - Poco::Logger * log = &Poco::Logger::get("unique_idx"); - LOG_DEBUG(log, "new unique index" << actions->dumpActions()); } bool UniqueCondition::alwaysUnknownOrTrue() const diff --git a/dbms/tests/queries/0_stateless/00832_unique_index.sql b/dbms/tests/queries/0_stateless/00832_unique_index.sql index 7bbeb074ed4..65568255c86 100644 --- a/dbms/tests/queries/0_stateless/00832_unique_index.sql +++ b/dbms/tests/queries/0_stateless/00832_unique_index.sql @@ -1,6 +1,6 @@ -DROP TABLE IF EXISTS test.minmax_idx; +DROP TABLE IF EXISTS test.unique_idx; -CREATE TABLE test.minmax_idx +CREATE TABLE test.unique_idx ( u64 UInt64, i32 Int32, @@ -11,31 +11,31 @@ CREATE TABLE test.minmax_idx dt Date, INDEX idx_all (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 4, INDEX idx_all2 (i32, i32 + f64, d, s, e, dt) TYPE unique GRANULARITY 2, - INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE unique() GRANULARITY 3 + INDEX idx_2 (u64 + toYear(dt), substring(s, 2, 4)) TYPE unique GRANULARITY 3 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 2; /* many small inserts => table will make merges */ -INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); -INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); -INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); -INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); -INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); -INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-01-01'); +INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-01-04'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2017-01-01'); +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2016-01-01'); +INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2015-01-01'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-11-11'); -INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); -INSERT INTO test.minmax_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); -INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); -INSERT INTO test.minmax_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); -INSERT INTO test.minmax_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); -INSERT INTO test.minmax_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-02-11'); +INSERT INTO test.unique_idx VALUES (0, 5, 4.7, 6.5, 'cba', 'b', '2014-03-11'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-04-11'); +INSERT INTO test.unique_idx VALUES (1, 2, 4.5, 2.5, 'abc', 'a', '2014-05-11'); +INSERT INTO test.unique_idx VALUES (2, 5, 4.7, 6.5, 'cba', 'b', '2014-06-11'); +INSERT INTO test.unique_idx VALUES (1, 5, 6.9, 1.57, 'bac', 'c', '2014-07-11'); /* simple select */ -SELECT * FROM test.minmax_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; +SELECT * FROM test.unique_idx WHERE i32 = 5 AND i32 + f64 < 12 AND 3 < d AND d < 7 AND (s = 'bac' OR s = 'cba') ORDER BY dt; /* select with hole made by primary key */ -SELECT * FROM test.minmax_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt; +SELECT * FROM test.unique_idx WHERE u64 != 1 AND e = 'b' ORDER BY dt; -DROP TABLE test.minmax_idx; \ No newline at end of file +DROP TABLE test.unique_idx; \ No newline at end of file From 4de473a7b74dc26d13ed68e88793d358bf519d66 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 21:22:12 +0300 Subject: [PATCH 19/20] unique --- docs/en/operations/table_engines/mergetree.md | 11 +++++++++-- docs/ru/operations/table_engines/mergetree.md | 15 ++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 52d34ea059b..473f2109e99 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -250,7 +250,7 @@ CREATE TABLE table_name s String, ... INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 + INDEX b (u64 * length(s)) TYPE unique GRANULARITY 4 ) ENGINE = MergeTree() ... ``` @@ -263,10 +263,17 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### Available Types of Indices -* `minmax` Stores extremes of specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like primary key. +* `minmax` +Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of the data like the primary key. + +* `unique(max_rows)` +Stores unique values of the specified expression (no more than `max_rows` rows), use them to check if the `WHERE` expression is not satisfiable on a block of the data. +If `max_rows=0`, then there are no limits for storing values. `unique` without parameters is equal to `unique(0)`. ```sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4 ``` diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 0f949319de1..7318e4d8145 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -241,7 +241,7 @@ CREATE TABLE table_name s String, ... INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s)) TYPE minmax GRANULARITY 4 + INDEX b (u64 * length(s), i32) TYPE unique GRANULARITY 4 ) ENGINE = MergeTree() ... ``` @@ -254,13 +254,22 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### Доступные индексы -* `minmax` Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска кусков аналогично первичному ключу. +* `minmax` +Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу. -Пример +* `unique(max_rows)` +Хранит уникальные значения выражения на блоке в количестве не более `max_rows`, используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. +Если `max_rows=0`, то хранит значения выражения без ограничений. Если параметров не передано, то полагается `max_rows=0`. + + +Примеры ```sql INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique GRANULARITY 4 +INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE unique(100) GRANULARITY 4 ``` + ## Конкурентный доступ к данным Для конкурентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям. From 371e1658f88cb1a9065b64e25a887755edfe7783 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Tue, 29 Jan 2019 21:40:10 +0300 Subject: [PATCH 20/20] fix --- dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp index 19ffb259d39..7d3c497e549 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeUniqueIndex.cpp @@ -232,7 +232,8 @@ bool UniqueCondition::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) c void UniqueCondition::traverseAST(ASTPtr & node) const { - if (operatorFromAST(node)) { + if (operatorFromAST(node)) + { auto * func = typeid_cast(&*node); auto & args = typeid_cast(*func->arguments).children;