From ef92ac4f3d7f1f9444c95369ba51f82f3aedd4b0 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 17:22:08 +0300 Subject: [PATCH 01/88] dbms: Server: Feature development. [#METR-17276] --- .../AggregateFunctionUniq.h | 43 ++++ .../DB/Common/CombinedCardinalityEstimator.h | 225 ++++++++++++++++++ dbms/include/DB/Common/HashTable/HashTable.h | 4 +- .../AggregateFunctionFactory.cpp | 21 ++ 4 files changed, 291 insertions(+), 2 deletions(-) create mode 100644 dbms/include/DB/Common/CombinedCardinalityEstimator.h diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 9d063d8ea8b..146bb6a9394 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -117,6 +118,26 @@ struct AggregateFunctionUniqExactData }; +template +struct AggregateFunctionUniqCombinedData +{ + using Key = T; + using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + Set set; + + static String getName() { return "uniqCombined"; } +}; + +template <> +struct AggregateFunctionUniqCombinedData +{ + using Key = UInt64; + using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + Set set; + + static String getName() { return "uniqCombined"; } +}; + namespace detail { /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. @@ -166,6 +187,28 @@ namespace detail data.set.insert(key); } }; + + template + struct OneAdder > + { + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + if (data.set.isMedium()) + data.set.insert(static_cast &>(column).getData()[row_num]); + else + data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + } + }; + + template<> + struct OneAdder > + { + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } + }; } diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h new file mode 100644 index 00000000000..3feca851b09 --- /dev/null +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -0,0 +1,225 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +template +class CombinedCardinalityEstimator +{ +public: + using Self = CombinedCardinalityEstimator; + +private: + using Small = SmallSet; + using Medium = HashType; + using Large = HyperLogLogWithSmallSetOptimization; + enum class ContainerType { SMALL, MEDIUM, LARGE }; + +public: + ~CombinedCardinalityEstimator() + { + if (container_type == ContainerType::MEDIUM) + { + delete medium; + + if (current_memory_tracker) + current_memory_tracker->free(sizeof(medium)); + } + else if (container_type == ContainerType::LARGE) + { + delete large; + + if (current_memory_tracker) + current_memory_tracker->free(sizeof(large)); + } + } + + void insert(Key value) + { + if (container_type == ContainerType::SMALL) + { + if (small.find(value) == small.end()) + { + if (!small.full()) + small.insert(value); + else + { + toMedium(); + medium->insert(value); + } + } + } + else if (container_type == ContainerType::MEDIUM) + { + if (medium->size() < medium_set_size) + medium->insert(value); + else + { + toLarge(); + large->insert(value); + } + } + else if (container_type == ContainerType::LARGE) + large->insert(value); + } + + UInt32 size() const + { + if (container_type == ContainerType::SMALL) + return small.size(); + else if (container_type == ContainerType::MEDIUM) + return medium->size(); + else if (container_type == ContainerType::LARGE) + return large->size(); + + return 0; + } + + void merge(const Self & rhs) + { + ContainerType res = max(container_type, rhs.container_type); + + if (container_type != res) + { + if (res == ContainerType::MEDIUM) + toMedium(); + else if (res == ContainerType::LARGE) + toLarge(); + } + + if (container_type == ContainerType::SMALL) + { + for (const auto & x : rhs.small) + insert(x); + } + else if (container_type == ContainerType::MEDIUM) + { + if (rhs.container_type == ContainerType::SMALL) + { + for (const auto & x : rhs.small) + insert(x); + } + else if (rhs.container_type == ContainerType::MEDIUM) + { + for (const auto & x : *rhs.medium) + insert(x); + } + } + else if (container_type == ContainerType::LARGE) + { + if (rhs.container_type == ContainerType::SMALL) + { + for (const auto & x : rhs.small) + insert(x); + } + else if (rhs.container_type == ContainerType::MEDIUM) + { + for (const auto & x : *rhs.medium) + insert(x); + } + else if (rhs.container_type == ContainerType::LARGE) + large->merge(*rhs.large); + } + } + + void read(DB::ReadBuffer & in) + { + UInt8 v; + readBinary(v, in); + ContainerType t = static_cast(v); + + if (t == ContainerType::SMALL) + small.read(in); + else if (t == ContainerType::MEDIUM) + { + toMedium(); + medium->read(in); + } + else if (t == ContainerType::LARGE) + { + toLarge(); + large->read(in); + } + } + + void readAndMerge(DB::ReadBuffer & in) + { + Self other; + other.read(in); + merge(other); + } + + void write(DB::WriteBuffer & out) const + { + UInt8 v = static_cast(container_type); + writeBinary(v, out); + + if (container_type == ContainerType::SMALL) + small.write(out); + else if (container_type == ContainerType::MEDIUM) + medium->write(out); + else if (container_type == ContainerType::LARGE) + large->write(out); + } + + bool isMedium() const + { + return container_type == ContainerType::MEDIUM; + } + +private: + void toMedium() + { + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(medium)); + + Medium * tmp_medium = new Medium; + + for (const auto & x : small) + tmp_medium->insert(x); + + medium = tmp_medium; + + container_type = ContainerType::MEDIUM; + } + + void toLarge() + { + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(large)); + + Large * tmp_large = new Large; + + for (const auto & x : *medium) + tmp_large->insert(x); + + large = tmp_large; + + delete medium; + medium = nullptr; + + if (current_memory_tracker) + current_memory_tracker->free(sizeof(medium)); + + container_type = ContainerType::LARGE; + } + + ContainerType max(const ContainerType & lhs, const ContainerType & rhs) + { + unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + return static_cast(res); + } + +private: + ContainerType container_type = ContainerType::SMALL; + const UInt32 medium_set_size = 1UL << medium_set_power; + Small small; + Medium * medium = nullptr; + Large * large = nullptr; +}; + +} diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 1fdee83c54b..67196746ae5 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -757,7 +757,7 @@ public: { Cell x; x.read(rb); - insert(x); + insert(Cell::getKey(x.getValue())); } } @@ -781,7 +781,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(x); + insert(Cell::getKey(x.getValue())); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 39464720135..4676d21bdda 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -351,6 +351,26 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da else throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + else if (name == "uniqCombined") + { + if (argument_types.size() != 1) + throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const IDataType & argument_type = *argument_types[0]; + + AggregateFunctionPtr res = createWithNumericType(*argument_types[0]); + + if (res) + return res; + else if (typeid_cast(&argument_type)) + return new AggregateFunctionUniq>; + else if (typeid_cast(&argument_type)) + return new AggregateFunctionUniq>; + else if (typeid_cast(&argument_type) || typeid_cast(&argument_type)) + return new AggregateFunctionUniq>; + else + throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } else if (name == "uniqUpTo") { if (argument_types.size() != 1) @@ -706,6 +726,7 @@ const AggregateFunctionFactory::FunctionNames & AggregateFunctionFactory::getFun "uniq", "uniqHLL12", "uniqExact", + "uniqCombined", "uniqUpTo", "groupArray", "groupUniqArray", From 242658c005abf53b9a0b265dfde4c0d74b8be86b Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 17:54:56 +0300 Subject: [PATCH 02/88] dbms: Server: Feature development. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 3feca851b09..5b47dcae1af 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -65,6 +65,8 @@ public: } else if (container_type == ContainerType::LARGE) large->insert(value); + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } UInt32 size() const @@ -75,8 +77,8 @@ public: return medium->size(); else if (container_type == ContainerType::LARGE) return large->size(); - - return 0; + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void merge(const Self & rhs) @@ -124,6 +126,8 @@ public: else if (rhs.container_type == ContainerType::LARGE) large->merge(*rhs.large); } + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void read(DB::ReadBuffer & in) @@ -144,6 +148,8 @@ public: toLarge(); large->read(in); } + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void readAndMerge(DB::ReadBuffer & in) @@ -155,7 +161,7 @@ public: void write(DB::WriteBuffer & out) const { - UInt8 v = static_cast(container_type); + UInt8 v = static_cast(container_type); writeBinary(v, out); if (container_type == ContainerType::SMALL) @@ -164,6 +170,8 @@ public: medium->write(out); else if (container_type == ContainerType::LARGE) large->write(out); + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } bool isMedium() const From 92adfc9e0ea04c59f3d6d84ecc36ec7717e91edd Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 18:43:24 +0300 Subject: [PATCH 03/88] dbms: Server: Feature development. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 107 ++++++++++-------- 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 5b47dcae1af..0793f82e94d 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -4,32 +4,45 @@ #include #include + namespace DB { -template +namespace details +{ + +enum class ContainerType { SMALL, MEDIUM, LARGE }; + +ContainerType max(const ContainerType & lhs, const ContainerType & rhs) +{ + unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + return static_cast(res); +} + +} + +template class CombinedCardinalityEstimator { public: - using Self = CombinedCardinalityEstimator; + using Self = CombinedCardinalityEstimator; private: - using Small = SmallSet; - using Medium = HashType; - using Large = HyperLogLogWithSmallSetOptimization; - enum class ContainerType { SMALL, MEDIUM, LARGE }; + using Small = SmallSet; + using Medium = HashContainer; + using Large = HyperLogLogWithSmallSetOptimization; public: ~CombinedCardinalityEstimator() { - if (container_type == ContainerType::MEDIUM) + if (container_type == details::ContainerType::MEDIUM) { delete medium; if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { delete large; @@ -40,7 +53,7 @@ public: void insert(Key value) { - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) { if (small.find(value) == small.end()) { @@ -53,9 +66,9 @@ public: } } } - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { - if (medium->size() < medium_set_size) + if (medium->size() < medium_set_size_max) medium->insert(value); else { @@ -63,7 +76,7 @@ public: large->insert(value); } } - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) large->insert(value); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -71,11 +84,11 @@ public: UInt32 size() const { - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) return small.size(); - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) return medium->size(); - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) return large->size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -83,47 +96,47 @@ public: void merge(const Self & rhs) { - ContainerType res = max(container_type, rhs.container_type); + details::ContainerType max_container_type = details::max(container_type, rhs.container_type); - if (container_type != res) + if (container_type != max_container_type) { - if (res == ContainerType::MEDIUM) + if (max_container_type == details::ContainerType::MEDIUM) toMedium(); - else if (res == ContainerType::LARGE) + else if (max_container_type == details::ContainerType::LARGE) toLarge(); } - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { - if (rhs.container_type == ContainerType::SMALL) + if (rhs.container_type == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == ContainerType::MEDIUM) + else if (rhs.container_type == details::ContainerType::MEDIUM) { for (const auto & x : *rhs.medium) insert(x); } } - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { - if (rhs.container_type == ContainerType::SMALL) + if (rhs.container_type == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == ContainerType::MEDIUM) + else if (rhs.container_type == details::ContainerType::MEDIUM) { for (const auto & x : *rhs.medium) insert(x); } - else if (rhs.container_type == ContainerType::LARGE) + else if (rhs.container_type == details::ContainerType::LARGE) large->merge(*rhs.large); } else @@ -134,16 +147,16 @@ public: { UInt8 v; readBinary(v, in); - ContainerType t = static_cast(v); + details::ContainerType t = static_cast(v); - if (t == ContainerType::SMALL) + if (t == details::ContainerType::SMALL) small.read(in); - else if (t == ContainerType::MEDIUM) + else if (t == details::ContainerType::MEDIUM) { toMedium(); medium->read(in); } - else if (t == ContainerType::LARGE) + else if (t == details::ContainerType::LARGE) { toLarge(); large->read(in); @@ -164,11 +177,11 @@ public: UInt8 v = static_cast(container_type); writeBinary(v, out); - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) small.write(out); - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) medium->write(out); - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) large->write(out); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -176,12 +189,17 @@ public: bool isMedium() const { - return container_type == ContainerType::MEDIUM; + return container_type == details::ContainerType::MEDIUM; } private: void toMedium() { + if (container_type != details::ContainerType::SMALL) + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + + container_type = details::ContainerType::MEDIUM; + if (current_memory_tracker) current_memory_tracker->alloc(sizeof(medium)); @@ -191,12 +209,15 @@ private: tmp_medium->insert(x); medium = tmp_medium; - - container_type = ContainerType::MEDIUM; } void toLarge() { + if (container_type != details::ContainerType::MEDIUM) + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + + container_type = details::ContainerType::LARGE; + if (current_memory_tracker) current_memory_tracker->alloc(sizeof(large)); @@ -212,22 +233,14 @@ private: if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); - - container_type = ContainerType::LARGE; - } - - ContainerType max(const ContainerType & lhs, const ContainerType & rhs) - { - unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); - return static_cast(res); } private: - ContainerType container_type = ContainerType::SMALL; - const UInt32 medium_set_size = 1UL << medium_set_power; Small small; Medium * medium = nullptr; Large * large = nullptr; + const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; + details::ContainerType container_type = details::ContainerType::SMALL; }; } From 7ce91cc59a9cb88efb7d6e729ff7cda680fe5c69 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 19:16:56 +0300 Subject: [PATCH 04/88] dbms: Server: Feature development. [#METR-17276] --- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 0793f82e94d..b0377858026 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -21,6 +21,10 @@ ContainerType max(const ContainerType & lhs, const ContainerType & rhs) } +/** Для маленького количества ключей - массив фиксированного размера "на стеке". + * Для среднего - выделяется HashSet. + * Для большого - выделяется HyperLogLog. + */ template class CombinedCardinalityEstimator { @@ -143,6 +147,7 @@ public: throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } + /// Можно вызывать только для пустого объекта. void read(DB::ReadBuffer & in) { UInt8 v; From f95d090a45c81030b0ee4d0eec74a58d4cce0e0f Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 20:07:51 +0300 Subject: [PATCH 05/88] dbms: Server: Feature development. [#METR-17276] --- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index b0377858026..a3d15a8c155 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -218,7 +218,7 @@ private: void toLarge() { - if (container_type != details::ContainerType::MEDIUM) + if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); container_type = details::ContainerType::LARGE; From 45d8bebff66e60f1af0ca5b843cb7ec4223ccfc1 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 22 Jul 2015 17:12:34 +0300 Subject: [PATCH 06/88] dbms: Server: Squeezed most of the fields into a 8-byte memory chunk. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 171 ++++++++++++------ 1 file changed, 113 insertions(+), 58 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 43c11380668..cef45658521 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,9 +12,9 @@ namespace DB namespace details { -enum class ContainerType { SMALL, MEDIUM, LARGE }; +enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 }; -ContainerType max(const ContainerType & lhs, const ContainerType & rhs) +static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs) { unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); return static_cast(res); @@ -37,26 +38,20 @@ private: using Large = HyperLogLogWithSmallSetOptimization; public: + CombinedCardinalityEstimator() + { + setContainerType(details::ContainerType::SMALL); + } + ~CombinedCardinalityEstimator() { - if (container_type == details::ContainerType::MEDIUM) - { - delete medium; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(medium)); - } - else if (container_type == details::ContainerType::LARGE) - { - delete large; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(large)); - } + destroy(); } void insert(Key value) { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) { if (small.find(value) == small.end()) @@ -66,41 +61,44 @@ public: else { toMedium(); - medium->insert(value); + getObject()->insert(value); } } } else if (container_type == details::ContainerType::MEDIUM) { - if (medium->size() < medium_set_size_max) - medium->insert(value); + if (getObject()->size() < medium_set_size_max) + getObject()->insert(value); else { toLarge(); - large->insert(value); + getObject()->insert(value); } } else if (container_type == details::ContainerType::LARGE) - large->insert(value); + getObject()->insert(value); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } UInt32 size() const { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) return small.size(); else if (container_type == details::ContainerType::MEDIUM) - return medium->size(); + return getObject()->size(); else if (container_type == details::ContainerType::LARGE) - return large->size(); + return getObject()->size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void merge(const Self & rhs) { - details::ContainerType max_container_type = details::max(container_type, rhs.container_type); + auto container_type = getContainerType(); + auto max_container_type = details::max(container_type, rhs.getContainerType()); if (container_type != max_container_type) { @@ -117,31 +115,31 @@ public: } else if (container_type == details::ContainerType::MEDIUM) { - if (rhs.container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.medium) + for (const auto & x : *rhs.getObject()) insert(x); } } else if (container_type == details::ContainerType::LARGE) { - if (rhs.container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.medium) + for (const auto & x : *rhs.getObject()) insert(x); } - else if (rhs.container_type == details::ContainerType::LARGE) - large->merge(*rhs.large); + else if (rhs.getContainerType() == details::ContainerType::LARGE) + getObject()->merge(*rhs.getObject()); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -159,12 +157,12 @@ public: else if (t == details::ContainerType::MEDIUM) { toMedium(); - medium->read(in); + getObject()->read(in); } else if (t == details::ContainerType::LARGE) { toLarge(); - large->read(in); + getObject()->read(in); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -179,52 +177,57 @@ public: void write(DB::WriteBuffer & out) const { + auto container_type = getContainerType(); + UInt8 v = static_cast(container_type); writeBinary(v, out); if (container_type == details::ContainerType::SMALL) small.write(out); else if (container_type == details::ContainerType::MEDIUM) - medium->write(out); + getObject()->write(out); else if (container_type == details::ContainerType::LARGE) - large->write(out); + getObject()->write(out); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } bool isMedium() const { - return container_type == details::ContainerType::MEDIUM; + return getContainerType() == details::ContainerType::MEDIUM; } private: void toMedium() { - if (container_type != details::ContainerType::SMALL) + if (getContainerType() != details::ContainerType::SMALL) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(medium)); - - Medium * tmp_medium = new Medium; + auto tmp_medium = std::make_unique(); for (const auto & x : small) tmp_medium->insert(x); - medium = tmp_medium; + new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - container_type = details::ContainerType::MEDIUM; + std::atomic_signal_fence(std::memory_order_seq_cst); + + setContainerType(details::ContainerType::MEDIUM); + + std::atomic_signal_fence(std::memory_order_seq_cst); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(medium)); } void toLarge() { + auto container_type = getContainerType(); + if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(large)); - - Large * tmp_large = new Large; + auto tmp_large = std::make_unique(); if (container_type == details::ContainerType::SMALL) { @@ -233,30 +236,82 @@ private: } else if (container_type == details::ContainerType::MEDIUM) { - for (const auto & x : *medium) + for (const auto & x : *getObject()) tmp_large->insert(x); + + destroy(); } - large = tmp_large; + new (&large) std::unique_ptr{ std::move(tmp_large) }; + + std::atomic_signal_fence(std::memory_order_seq_cst); + + setContainerType(details::ContainerType::LARGE); + + std::atomic_signal_fence(std::memory_order_seq_cst); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(large)); + + } + + void NO_INLINE destroy() + { + auto container_type = getContainerType(); + + clearContainerType(); if (container_type == details::ContainerType::MEDIUM) { - delete medium; - medium = nullptr; - + medium.std::unique_ptr::~unique_ptr(); if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } + else if (container_type == details::ContainerType::LARGE) + { + large.std::unique_ptr::~unique_ptr(); + if (current_memory_tracker) + current_memory_tracker->free(sizeof(large)); + } + } - container_type = details::ContainerType::LARGE; + template + T * getObject() + { + return reinterpret_cast(address & mask); + } + + template + const T * getObject() const + { + return reinterpret_cast(address & mask); + } + + void setContainerType(details::ContainerType t) + { + address |= static_cast(t); + } + + details::ContainerType getContainerType() const + { + return static_cast(address & ~mask); + } + + void clearContainerType() + { + address &= mask; } private: Small small; - Medium * medium = nullptr; - Large * large = nullptr; - const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; - details::ContainerType container_type = details::ContainerType::SMALL; + union + { + std::unique_ptr medium; + std::unique_ptr large; + UInt64 address = 0; + }; + static const UInt64 mask = 0xFFFFFFFC; + static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; }; } From 4bf6fe20706393dd89d80037b65a95ef9afe8612 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 22 Jul 2015 18:25:59 +0300 Subject: [PATCH 07/88] dbms: Server: Feature development. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 53 +++++++++---------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index cef45658521..014903c0db0 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -61,22 +61,23 @@ public: else { toMedium(); - getObject()->insert(value); + getContainer().insert(value); } } } else if (container_type == details::ContainerType::MEDIUM) { - if (getObject()->size() < medium_set_size_max) - getObject()->insert(value); + auto & container = getContainer(); + if (container.size() < medium_set_size_max) + container.insert(value); else { toLarge(); - getObject()->insert(value); + getContainer().insert(value); } } else if (container_type == details::ContainerType::LARGE) - getObject()->insert(value); + getContainer().insert(value); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } @@ -88,9 +89,9 @@ public: if (container_type == details::ContainerType::SMALL) return small.size(); else if (container_type == details::ContainerType::MEDIUM) - return getObject()->size(); + return getContainer().size(); else if (container_type == details::ContainerType::LARGE) - return getObject()->size(); + return getContainer().size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } @@ -122,7 +123,7 @@ public: } else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.getObject()) + for (const auto & x : rhs.getContainer()) insert(x); } } @@ -135,11 +136,11 @@ public: } else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.getObject()) + for (const auto & x : rhs.getContainer()) insert(x); } else if (rhs.getContainerType() == details::ContainerType::LARGE) - getObject()->merge(*rhs.getObject()); + getContainer().merge(rhs.getContainer()); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -150,19 +151,19 @@ public: { UInt8 v; readBinary(v, in); - details::ContainerType t = static_cast(v); + auto container_type = static_cast(v); - if (t == details::ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) small.read(in); - else if (t == details::ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { toMedium(); - getObject()->read(in); + getContainer().read(in); } - else if (t == details::ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { toLarge(); - getObject()->read(in); + getContainer().read(in); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -178,16 +179,14 @@ public: void write(DB::WriteBuffer & out) const { auto container_type = getContainerType(); - - UInt8 v = static_cast(container_type); - writeBinary(v, out); + writeBinary(static_cast(container_type), out); if (container_type == details::ContainerType::SMALL) small.write(out); else if (container_type == details::ContainerType::MEDIUM) - getObject()->write(out); + getContainer().write(out); else if (container_type == details::ContainerType::LARGE) - getObject()->write(out); + getContainer().write(out); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } @@ -236,7 +235,7 @@ private: } else if (container_type == details::ContainerType::MEDIUM) { - for (const auto & x : *getObject()) + for (const auto & x : getContainer()) tmp_large->insert(x); destroy(); @@ -276,15 +275,15 @@ private: } template - T * getObject() + T & getContainer() { - return reinterpret_cast(address & mask); + return *reinterpret_cast(address & mask); } template - const T * getObject() const + const T & getContainer() const { - return reinterpret_cast(address & mask); + return *reinterpret_cast(address & mask); } void setContainerType(details::ContainerType t) @@ -292,7 +291,7 @@ private: address |= static_cast(t); } - details::ContainerType getContainerType() const + inline details::ContainerType getContainerType() const { return static_cast(address & ~mask); } From b3bcc4f6688fa1f7f7eb8075b0d4efa1aaf9eef2 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 24 Jul 2015 19:46:00 +0300 Subject: [PATCH 08/88] dbms: Server: Various optimizations. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 91 +++++++++++-------- dbms/include/DB/Common/HashTable/HashTable.h | 46 ++++++++++ dbms/include/DB/Common/HashTable/SmallTable.h | 50 ++++++++++ .../HyperLogLogWithSmallSetOptimization.h | 18 +++- dbms/include/DB/Core/ErrorCodes.h | 1 + 5 files changed, 163 insertions(+), 43 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 014903c0db0..b7b3fb50552 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include @@ -16,7 +16,7 @@ enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 }; static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs) { - unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + UInt8 res = std::max(static_cast(lhs), static_cast(rhs)); return static_cast(res); } @@ -26,16 +26,25 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType & * Для среднего - выделяется HashSet. * Для большого - выделяется HyperLogLog. */ -template +template +< + typename Key, + typename HashContainer, + UInt8 small_set_size_max, + UInt8 medium_set_power2_max, + UInt8 K, + typename Hash = IntHash32, + typename DenominatorType = float +> class CombinedCardinalityEstimator { public: - using Self = CombinedCardinalityEstimator; + using Self = CombinedCardinalityEstimator; private: using Small = SmallSet; using Medium = HashContainer; - using Large = HyperLogLogWithSmallSetOptimization; + using Large = HyperLogLogCounter; public: CombinedCardinalityEstimator() @@ -109,41 +118,18 @@ public: toLarge(); } - if (container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - if (rhs.getContainerType() == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.getContainerType() == details::ContainerType::MEDIUM) - { - for (const auto & x : rhs.getContainer()) - insert(x); - } + for (const auto & x : rhs.getContainer()) + insert(x); } - else if (container_type == details::ContainerType::LARGE) - { - if (rhs.getContainerType() == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.getContainerType() == details::ContainerType::MEDIUM) - { - for (const auto & x : rhs.getContainer()) - insert(x); - } - else if (rhs.getContainerType() == details::ContainerType::LARGE) - getContainer().merge(rhs.getContainer()); - } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + else if (rhs.getContainerType() == details::ContainerType::LARGE) + getContainer().merge(rhs.getContainer()); } /// Можно вызывать только для пустого объекта. @@ -171,9 +157,36 @@ public: void readAndMerge(DB::ReadBuffer & in) { - Self other; - other.read(in); - merge(other); + auto container_type = getContainerType(); + + UInt8 v; + readBinary(v, in); + auto rhs_container_type = static_cast(v); + + auto max_container_type = details::max(container_type, rhs_container_type); + + if (container_type != max_container_type) + { + if (max_container_type == details::ContainerType::MEDIUM) + toMedium(); + else if (max_container_type == details::ContainerType::LARGE) + toLarge(); + } + + if (rhs_container_type == details::ContainerType::SMALL) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::MEDIUM) + { + typename Medium::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::LARGE) + getContainer().readAndMerge(in); } void write(DB::WriteBuffer & out) const @@ -275,13 +288,13 @@ private: } template - T & getContainer() + inline T & getContainer() { return *reinterpret_cast(address & mask); } template - const T & getContainer() const + inline const T & getContainer() const { return *reinterpret_cast(address & mask); } diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 67196746ae5..2ce34905d69 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -251,6 +251,7 @@ class HashTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; template friend class TwoLevelHashTable; @@ -429,6 +430,51 @@ public: free(); } + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return Cell::getKey(cell.getValue()); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; class iterator { diff --git a/dbms/include/DB/Common/HashTable/SmallTable.h b/dbms/include/DB/Common/HashTable/SmallTable.h index 10ec8479b93..521fe117845 100644 --- a/dbms/include/DB/Common/HashTable/SmallTable.h +++ b/dbms/include/DB/Common/HashTable/SmallTable.h @@ -27,6 +27,7 @@ class SmallTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; typedef SmallTable Self; typedef Cell cell_type; @@ -66,6 +67,55 @@ public: typedef typename Cell::value_type value_type; + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + + if (size > capacity) + throw DB::Exception("Illegal size"); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return Cell::getKey(cell.getValue()); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; + class iterator { Self * container; diff --git a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h index 7932ddfb0e8..405f7c5ca12 100644 --- a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h @@ -114,10 +114,20 @@ public: void readAndMerge(DB::ReadBuffer & in) { - /// Немного не оптимально. - HyperLogLogWithSmallSetOptimization other; - other.read(in); - merge(other); + bool is_rhs_large; + readBinary(is_rhs_large, in); + + if (!isLarge() && is_rhs_large) + toLarge(); + + if (!is_rhs_large) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else + large->readAndMerge(in); } void write(DB::WriteBuffer & out) const diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 937b06d5ce0..8fae35ea601 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -283,6 +283,7 @@ namespace ErrorCodes INDEX_NOT_USED = 277, LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, + NO_AVAILABLE_DATA = 280, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, From 04c78e57ef6bc119b84485183d337b0d3eda3bb8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jul 2015 22:23:22 +0300 Subject: [PATCH 09/88] dbms: function extractURLParameter: fixed error and removed support for ; [#METR-17461]. --- dbms/include/DB/Functions/FunctionsURL.h | 41 +++++++++++++----------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsURL.h b/dbms/include/DB/Functions/FunctionsURL.h index 437849fba30..6a588834786 100644 --- a/dbms/include/DB/Functions/FunctionsURL.h +++ b/dbms/include/DB/Functions/FunctionsURL.h @@ -404,31 +404,36 @@ struct ExtractURLParameterImpl { size_t cur_offset = offsets[i]; + const char * str = reinterpret_cast(&data[prev_offset]); + const char * pos = nullptr; - - do + const char * begin = strchr(str, '?'); + if (begin != nullptr) { - const char * str = reinterpret_cast(&data[prev_offset]); - - const char * begin = strchr(str, '?'); - if (begin == nullptr) - break; - - pos = strstr(begin + 1, param_str); - if (pos == nullptr) - break; - if (pos != begin + 1 && *(pos - 1) != ';' && *(pos - 1) != '&') + pos = begin + 1; + while (true) { - pos = nullptr; - break; - } + pos = strstr(pos, param_str); - pos += param_len; - } while (false); + if (pos == nullptr) + break; + + if (pos[-1] != '?' && pos[-1] != '&') + { + pos += param_len; + continue; + } + else + { + pos += param_len; + break; + } + } + } if (pos != nullptr) { - const char * end = strpbrk(pos, "&;#"); + const char * end = strpbrk(pos, "&#"); if (end == nullptr) end = pos + strlen(pos); From 0c9c9fcead92161f94bb62192e72d3da4f726aec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Jul 2015 22:24:56 +0300 Subject: [PATCH 10/88] dbms: added test [#METR-17461]. --- .../queries/0_stateless/00204_extract_url_parameter.reference | 1 + dbms/tests/queries/0_stateless/00204_extract_url_parameter.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00204_extract_url_parameter.reference create mode 100644 dbms/tests/queries/0_stateless/00204_extract_url_parameter.sql diff --git a/dbms/tests/queries/0_stateless/00204_extract_url_parameter.reference b/dbms/tests/queries/0_stateless/00204_extract_url_parameter.reference new file mode 100644 index 00000000000..58c9bdf9d01 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00204_extract_url_parameter.reference @@ -0,0 +1 @@ +111 diff --git a/dbms/tests/queries/0_stateless/00204_extract_url_parameter.sql b/dbms/tests/queries/0_stateless/00204_extract_url_parameter.sql new file mode 100644 index 00000000000..d6ca5b31333 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00204_extract_url_parameter.sql @@ -0,0 +1 @@ +SELECT extractURLParameter('http://test.com/?testq=aaa&q=111', 'q'); From 007ecde455b801a4d151c895134ac40bdd792422 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 08:55:40 +0300 Subject: [PATCH 11/88] dbms: Client: allowed to paste multi-line queries even in single-line mode [#METR-2944]. --- dbms/src/Client/Client.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index a92ab6c4935..a7e064cb6e6 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -369,6 +369,19 @@ private: } + /** Проверка для случая, когда в терминал вставляется многострочный запрос из буфера обмена. + * Позволяет не начинать выполнение одной строчки запроса, пока весь запрос не будет вставлен. + */ + static bool hasDataInSTDIN() + { + timeval timeout = { 0, 0 }; + fd_set fds; + FD_ZERO(&fds); + FD_SET(STDIN_FILENO, &fds); + return select(1, &fds, 0, 0, &timeout) == 1; + } + + void loop() { String query; @@ -395,7 +408,7 @@ private: query += line; - if (!ends_with_backslash && (ends_with_semicolon || has_vertical_output_suffix || !config().has("multiline"))) + if (!ends_with_backslash && (ends_with_semicolon || has_vertical_output_suffix || (!config().has("multiline") && !hasDataInSTDIN()))) { if (query != prev_query) { From 39bc10041da19cb19f61fb059a120f757ab1c8c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 12:49:09 +0300 Subject: [PATCH 12/88] dbms: generating query_id [#METR-17469]. --- dbms/src/Interpreters/Context.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index ac992e66349..22024e0b851 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -96,6 +97,8 @@ struct ContextShared /// Создаются при создании Distributed таблиц, так как нужно дождаться пока будут выставлены Settings Poco::SharedPtr clusters; + Poco::UUIDGenerator uuid_generator; + bool shutdown_called = false; @@ -587,8 +590,12 @@ void Context::setCurrentDatabase(const String & name) void Context::setCurrentQueryId(const String & query_id) { + String query_id_to_set = query_id; + if (query_id_to_set.empty()) /// Если пользователь не передал свой query_id, то генерируем его самостоятельно. + query_id_to_set = shared->uuid_generator.createRandom().toString(); + Poco::ScopedLock lock(shared->mutex); - current_query_id = query_id; + current_query_id = query_id_to_set; } From e3449cae836a69fbb2a426434ab059222792cc39 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 12:56:56 +0300 Subject: [PATCH 13/88] dbms: added progress for query CREATE AS SELECT [#METR-17470]. --- .../DB/Interpreters/InterpreterCreateQuery.h | 5 ++--- .../Interpreters/InterpreterCreateQuery.cpp | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Interpreters/InterpreterCreateQuery.h b/dbms/include/DB/Interpreters/InterpreterCreateQuery.h index 7382b797147..367253edcb2 100644 --- a/dbms/include/DB/Interpreters/InterpreterCreateQuery.h +++ b/dbms/include/DB/Interpreters/InterpreterCreateQuery.h @@ -24,8 +24,7 @@ public: */ BlockIO execute() override { - executeImpl(false); - return {}; + return executeImpl(false); } /** assume_metadata_exists - не проверять наличие файла с метаданными и не создавать его @@ -45,7 +44,7 @@ public: const ColumnDefaults & column_defaults); private: - void executeImpl(bool assume_metadata_exists); + BlockIO executeImpl(bool assume_metadata_exists); /// AST в список столбцов с типами. Столбцы типа Nested развернуты в список настоящих столбцов. using ColumnsAndDefaults = std::pair; diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 662eda59e69..969dd1c6562 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include @@ -42,7 +42,7 @@ InterpreterCreateQuery::InterpreterCreateQuery(ASTPtr query_ptr_, Context & cont } -void InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) +BlockIO InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) { String path = context.getPath(); String current_database = context.getCurrentDatabase(); @@ -81,7 +81,7 @@ void InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) if (!create.if_not_exists || !context.isDatabaseExist(database_name)) context.addDatabase(database_name); - return; + return {}; } SharedPtr interpreter_select; @@ -118,7 +118,7 @@ void InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) if (context.isTableExist(database_name, table_name)) { if (create.if_not_exists) - return; + return {}; else throw Exception("Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } @@ -251,9 +251,15 @@ void InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) /// Если запрос CREATE SELECT, то вставим в таблицу данные if (create.select && storage_name != "View" && (storage_name != "MaterializedView" || create.is_populate)) { - BlockInputStreamPtr from = new MaterializingBlockInputStream(interpreter_select->execute().in); - copyData(*from, *res->write(query_ptr)); + BlockIO io; + io.in = new NullAndDoCopyBlockInputStream( + new MaterializingBlockInputStream(interpreter_select->execute().in), + res->write(query_ptr)); + + return io; } + + return {}; } InterpreterCreateQuery::ColumnsAndDefaults InterpreterCreateQuery::parseColumns(ASTPtr expression_list) From 6acbf3db3e5205b01497594d950bfd0bff084411 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 13:38:52 +0300 Subject: [PATCH 14/88] dbms: fixed progress bar for INSERT SELECT and CREATE AS SELECT queries [#METR-17470]. --- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 3 --- dbms/src/Interpreters/executeQuery.cpp | 9 +++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 6c0d79c6474..a6c66a015d3 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -330,9 +330,6 @@ BlockIO InterpreterSelectQuery::execute() /// Ограничения на результат, квота на результат, а также колбек для прогресса. if (IProfilingBlockInputStream * stream = dynamic_cast(&*streams[0])) { - stream->setProgressCallback(context.getProgressCallback()); - stream->setProcessListElement(context.getProcessListElement()); - /// Ограничения действуют только на конечный результат. if (to_stage == QueryProcessingStage::Complete) { diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 6912b9f4945..126b175c6bd 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -166,6 +166,15 @@ static std::tuple executeQueryImpl( /// Держим элемент списка процессов до конца обработки запроса. res.process_list_entry = process_list_entry; + if (res.in) + { + if (IProfilingBlockInputStream * stream = dynamic_cast(res.in.get())) + { + stream->setProgressCallback(context.getProgressCallback()); + stream->setProcessListElement(context.getProcessListElement()); + } + } + quota.addQuery(current_time); /// Всё, что связано с логом запросов. From d9e76da292ee2fa6a7d87c3bd0e035b69812d0f6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 13:58:41 +0300 Subject: [PATCH 15/88] dbms: added support for multiquery in interactive mode [#METR-17471]. --- dbms/src/Client/Client.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index a7e064cb6e6..f910e3b88a3 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -477,6 +477,12 @@ private: copyData(in, out); } + process(line); + } + + + bool process(const String & line) + { if (config().has("multiquery")) { /// Несколько запросов, разделенных ';'. @@ -507,17 +513,20 @@ private: while (isWhitespace(*begin) || *begin == ';') ++begin; - process(query, ast); + if (!processSingleQuery(query, ast)) + return false; } + + return true; } else { - process(line); + return processSingleQuery(line); } } - bool process(const String & line, ASTPtr parsed_query_ = nullptr) + bool processSingleQuery(const String & line, ASTPtr parsed_query_ = nullptr) { if (exit_strings.end() != exit_strings.find(line)) return false; From 80ce5f1f65826c65d09e569869936a81910ce655 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 14:47:50 +0300 Subject: [PATCH 16/88] dbms: scalar subqueries: initial implementation [#METR-17472]. --- .../DB/Interpreters/ExpressionAnalyzer.h | 4 + dbms/src/Interpreters/ExpressionAnalyzer.cpp | 91 ++++++++++++++++++- 2 files changed, 94 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h index 086cacdae18..36d7ea25e5e 100644 --- a/dbms/include/DB/Interpreters/ExpressionAnalyzer.h +++ b/dbms/include/DB/Interpreters/ExpressionAnalyzer.h @@ -226,6 +226,10 @@ private: /// Превратить перечисление значений или подзапрос в ASTSet. node - функция in или notIn. void makeSet(ASTFunction * node, const Block & sample_block); + /// Замена скалярных подзапросов на значения-константы. + void executeScalarSubqueries(); + void executeScalarSubqueriesImpl(ASTPtr & ast); + /// Находит глобальные подзапросы в секциях GLOBAL IN/JOIN. Заполняет external_tables. void initGlobalSubqueriesAndExternalTables(); void initGlobalSubqueries(ASTPtr & ast); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 11787edd54a..6b31f1d45dd 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -95,6 +95,7 @@ void ExpressionAnalyzer::init() LogicalExpressionsOptimizer logical_expressions_optimizer(select_query, settings); logical_expressions_optimizer.optimizeDisjunctiveEqualityChains(); + /// Добавляет в множество известных алиасов те, которые объявлены в структуре таблицы (ALIAS-столбцы). addStorageAliases(); /// Создаёт словарь aliases: alias -> ASTPtr @@ -103,6 +104,9 @@ void ExpressionAnalyzer::init() /// Common subexpression elimination. Rewrite rules. normalizeTree(); + /// Выполнение скалярных подзапросов - замена их на значения-константы. + executeScalarSubqueries(); + /// GROUP BY injective function elimination. optimizeGroupBy(); @@ -528,6 +532,90 @@ void ExpressionAnalyzer::normalizeTreeImpl( finished_asts[initial_ast] = ast; } + +void ExpressionAnalyzer::executeScalarSubqueries() +{ + executeScalarSubqueriesImpl(ast); +} + +void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) +{ + /** Заменяем подзапросы, возвращающие ровно одну строку + * ("скалярные" подзапросы) на соответствующие константы. + * + * Если подзапрос возвращает более одного столбца, то он заменяется на кортеж констант. + * + * Особенности: + * + * Замена происходит во время анализа запроса, а не во время основной стадии выполнения. + * Это значит, что не будет работать индикатор прогресса во время выполнения этих запросов, + * а также такие запросы нельзя будет прервать. + * + * Зато результат запросов может быть использован для индекса в таблице. + * + * Скалярные подзапросы выполняются на сервере-инициаторе запроса. + * На удалённые серверы запрос отправляется с уже подставленными константами. + * + * Замечения: + * Нет возможности указать алиас для подзапроса. + */ + + if (ASTSubquery * subquery = typeid_cast(ast.get())) + { + Context subquery_context = context; + Settings subquery_settings = context.getSettings(); + subquery_settings.limits.max_result_rows = 1; + subquery_settings.extremes = 0; + subquery_context.setSettings(subquery_settings); + + ASTPtr query = subquery->children.at(0); + BlockIO res = InterpreterSelectQuery(query, subquery_context, QueryProcessingStage::Complete, subquery_depth + 1).execute(); + + Block block; + try + { + block = res.in->read(); + + if (!block) + throw Exception("Scalar subquery returned empty result", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + + if (block.rows() != 1 || res.in->read()) + throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::TOO_MUCH_ROWS) + throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + else + throw; + } + + size_t columns = block.columns(); + if (columns == 1) + { + ast = new ASTLiteral(ast->range, (*block.getByPosition(0).column)[0]); + } + else + { + ASTFunction * tuple = new ASTFunction(ast->range); + ast = tuple; + tuple->kind = ASTFunction::FUNCTION; + tuple->name = "tuple"; + ASTExpressionList * exp_list = new ASTExpressionList(ast->range); + tuple->arguments = exp_list; + tuple->children.push_back(exp_list); + + exp_list->children.resize(columns); + for (size_t i = 0; i < columns; ++i) + exp_list->children[i] = new ASTLiteral(ast->range, (*block.getByPosition(i).column)[0]); + } + } + else + for (auto & child : ast->children) + executeScalarSubqueriesImpl(child); +} + + void ExpressionAnalyzer::optimizeGroupBy() { if (!(select_query && select_query->group_expression_list)) @@ -693,7 +781,8 @@ static SharedPtr interpretSubquery( * Так как результат этого поздапроса - ещё не результат всего запроса. * Вместо этого работают ограничения * max_rows_in_set, max_bytes_in_set, set_overflow_mode, - * max_rows_in_join, max_bytes_in_join, join_overflow_mode. + * max_rows_in_join, max_bytes_in_join, join_overflow_mode, + * которые проверяются отдельно (в объектах Set, Join). */ Context subquery_context = context; Settings subquery_settings = context.getSettings(); From 35b55b7e4f8733998144177080b651862e0ccf89 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 25 Jul 2015 14:59:21 +0300 Subject: [PATCH 17/88] dbms: added tests; fixed error in prev. revision [#METR-17472]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- .../tests/queries/0_stateless/00205_scalar_subqueries.reference | 1 + dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference create mode 100644 dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 6b31f1d45dd..2bf615d5dc1 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -603,7 +603,7 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) tuple->name = "tuple"; ASTExpressionList * exp_list = new ASTExpressionList(ast->range); tuple->arguments = exp_list; - tuple->children.push_back(exp_list); + tuple->children.push_back(tuple->arguments); exp_list->children.resize(columns); for (size_t i = 0; i < columns; ++i) diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference @@ -0,0 +1 @@ +1 1 diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql new file mode 100644 index 00000000000..d89c4769261 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql @@ -0,0 +1 @@ +SELECT (SELECT (SELECT (SELECT (SELECT (SELECT count() FROM (SELECT * FROM system.numbers LIMIT 10)))))) = (SELECT 10), ((SELECT 1, 'Hello', [1, 2]).3)[1]; From b87857152b7aa9a27d409e4c49f01a77bc715ba0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 10:08:46 +0300 Subject: [PATCH 18/88] dbms: fixed error in prev. revision [#METR-17472]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 49 ++++++++++++++++--- .../00205_scalar_subqueries.reference | 1 + .../0_stateless/00205_scalar_subqueries.sql | 1 + 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 2bf615d5dc1..716d45a3846 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -87,6 +87,18 @@ const std::unordered_set possibly_injective_function_names "dictGetDateTime" }; +static bool functionIsInOperator(const String & name) +{ + return name == "in" || name == "notIn"; +} + +static bool functionIsInOrGlobalInOperator(const String & name) +{ + return name == "in" || name == "notIn" || name == "globalIn" || name == "globalNotIn"; +} + + + void ExpressionAnalyzer::init() { select_query = typeid_cast(&*ast); @@ -392,7 +404,7 @@ void ExpressionAnalyzer::normalizeTreeImpl( } /// Может быть указано IN t, где t - таблица, что равносильно IN (SELECT * FROM t). - if (func_node->name == "in" || func_node->name == "notIn" || func_node->name == "globalIn" || func_node->name == "globalNotIn") + if (functionIsInOrGlobalInOperator(func_node->name)) if (ASTIdentifier * right = typeid_cast(&*func_node->arguments->children.at(1))) right->kind = ASTIdentifier::Table; @@ -535,7 +547,12 @@ void ExpressionAnalyzer::normalizeTreeImpl( void ExpressionAnalyzer::executeScalarSubqueries() { - executeScalarSubqueriesImpl(ast); + for (auto & child : ast->children) + { + /// Не опускаемся в FROM и JOIN. + if (!select_query || (child.get() != select_query->table.get() && child.get() != select_query->join.get())) + executeScalarSubqueriesImpl(child); + } } void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) @@ -611,8 +628,28 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) } } else - for (auto & child : ast->children) - executeScalarSubqueriesImpl(child); + { + /** Не опускаемся в подзапросы в аргументах IN. + * Но если аргумент - не подзапрос, то глубже внутри него могут быть подзапросы, и в них надо опускаться. + */ + ASTFunction * func = typeid_cast(ast.get()); + if (func && func->kind == ASTFunction::FUNCTION + && functionIsInOrGlobalInOperator(func->name)) + { + for (auto & child : ast->children) + { + if (child.get() != func->arguments) + executeScalarSubqueriesImpl(child); + else + for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i) + if (i != 1 || !typeid_cast(func->arguments->children[i].get())) + executeScalarSubqueriesImpl(func->arguments->children[i]); + } + } + else + for (auto & child : ast->children) + executeScalarSubqueriesImpl(child); + } } @@ -745,7 +782,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(ASTPtr & node, const Block & sampl makeSetsForIndexImpl(child, sample_block); ASTFunction * func = typeid_cast(node.get()); - if (func && func->kind == ASTFunction::FUNCTION && (func->name == "in" || func->name == "notIn")) + if (func && func->kind == ASTFunction::FUNCTION && functionIsInOperator(func->name)) { IAST & args = *func->arguments; ASTPtr & arg = args.children.at(1); @@ -1310,7 +1347,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl if (node->kind == ASTFunction::FUNCTION) { - if (node->name == "in" || node->name == "notIn" || node->name == "globalIn" || node->name == "globalNotIn") + if (functionIsInOrGlobalInOperator(node->name)) { if (!no_subqueries) { diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference index 9972842f982..84910dae000 100644 --- a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference @@ -1 +1,2 @@ 1 1 +1 diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql index d89c4769261..71af1ad6a07 100644 --- a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql @@ -1 +1,2 @@ SELECT (SELECT (SELECT (SELECT (SELECT (SELECT count() FROM (SELECT * FROM system.numbers LIMIT 10)))))) = (SELECT 10), ((SELECT 1, 'Hello', [1, 2]).3)[1]; +SELECT toUInt64((SELECT 9)) IN (SELECT number FROM system.numbers LIMIT 10); From 8e44832c85ec719e85aa8f310939afc4a5a20f8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 10:55:48 +0300 Subject: [PATCH 19/88] dbms: scalar subqueries: development [#METR-17472]. --- dbms/include/DB/Parsers/ASTSubquery.h | 8 ++--- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 32 ++++++++++++++++++-- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index f94cbfe8346..8dac88b26d3 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -2,7 +2,7 @@ #include -#include +#include namespace DB @@ -11,12 +11,12 @@ namespace DB /** Подзарос SELECT */ -class ASTSubquery : public IAST +class ASTSubquery : public ASTWithAlias { public: ASTSubquery() = default; - ASTSubquery(const StringRange range_) : IAST(range_) {} - + ASTSubquery(const StringRange range_) : ASTWithAlias(range_) {} + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Subquery"; } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 716d45a3846..e0c2224ec7a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -555,6 +555,27 @@ void ExpressionAnalyzer::executeScalarSubqueries() } } + +static ASTPtr addTypeConversion(ASTLiteral * ast_, const String & type_name) +{ + if (0 == type_name.compare(0, strlen("Array"), "Array")) + return ast_; /// Преобразование типов для массивов пока не поддерживаем. + + auto ast = std::unique_ptr(ast_); + ASTFunction * func = new ASTFunction(ast->range); + ASTPtr res = func; + func->alias = ast->alias; + ast->alias.clear(); + func->kind = ASTFunction::FUNCTION; + func->name = "to" + type_name; + ASTExpressionList * exp_list = new ASTExpressionList(ast->range); + func->arguments = exp_list; + func->children.push_back(func->arguments); + exp_list->children.push_back(ast.release()); + return res; +} + + void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) { /** Заменяем подзапросы, возвращающие ровно одну строку @@ -610,11 +631,14 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) size_t columns = block.columns(); if (columns == 1) { - ast = new ASTLiteral(ast->range, (*block.getByPosition(0).column)[0]); + ASTLiteral * lit = new ASTLiteral(ast->range, (*block.getByPosition(0).column)[0]); + lit->alias = subquery->alias; + ast = addTypeConversion(lit, block.getByPosition(0).type->getName()); } else { ASTFunction * tuple = new ASTFunction(ast->range); + tuple->alias = subquery->alias; ast = tuple; tuple->kind = ASTFunction::FUNCTION; tuple->name = "tuple"; @@ -624,7 +648,11 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) exp_list->children.resize(columns); for (size_t i = 0; i < columns; ++i) - exp_list->children[i] = new ASTLiteral(ast->range, (*block.getByPosition(i).column)[0]); + { + exp_list->children[i] = addTypeConversion( + new ASTLiteral(ast->range, (*block.getByPosition(i).column)[0]), + block.getByPosition(i).type->getName()); + } } } else From 323188bd2914b5e593f337848e232d3db6d1b91f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 11:16:39 +0300 Subject: [PATCH 20/88] dbms: scalar subqueries: development [#METR-17472]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 -- dbms/src/Parsers/ExpressionElementParsers.cpp | 8 ++--- dbms/src/Parsers/formatAST.cpp | 36 ++++++++++++------- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e0c2224ec7a..b95eb965d41 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -593,9 +593,6 @@ void ExpressionAnalyzer::executeScalarSubqueriesImpl(ASTPtr & ast) * * Скалярные подзапросы выполняются на сервере-инициаторе запроса. * На удалённые серверы запрос отправляется с уже подставленными константами. - * - * Замечения: - * Нет возможности указать алиас для подзапроса. */ if (ASTSubquery * subquery = typeid_cast(ast.get())) diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 1209af32578..e58849423e9 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -556,12 +556,8 @@ bool ParserWithOptionalAlias::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & { String alias_name = typeid_cast(*alias_node).name; - if (ASTFunction * func = typeid_cast(&*node)) - func->alias = alias_name; - else if (ASTIdentifier * ident = typeid_cast(&*node)) - ident->alias = alias_name; - else if (ASTLiteral * lit = typeid_cast(&*node)) - lit->alias = alias_name; + if (ASTWithAlias * ast_with_alias = dynamic_cast(node.get())) + ast_with_alias->alias = alias_name; else { expected = "alias cannot be here"; diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 3c1a0e7b62d..68feeb8d518 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -63,12 +63,24 @@ String backQuoteIfNeed(const String & x) } -String hightlight(const String & keyword, const String & color_sequence, const bool hilite) +static String hightlight(const String & keyword, const String & color_sequence, const bool hilite) { return hilite ? color_sequence + keyword + hilite_none : keyword; } +static void writeAlias(const String & name, std::ostream & s, bool hilite, bool one_line) +{ + s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); + + WriteBufferFromOStream wb(s, 32); + writeProbablyBackQuotedString(name, wb); + wb.next(); + + s << (hilite ? hilite_none : ""); +} + + void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) @@ -245,12 +257,23 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo void formatAST(const ASTSubquery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (need_parens && !ast.alias.empty()) + s << '('; + std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); std::string nl_or_nothing = one_line ? "" : "\n"; s << nl_or_nothing << indent_str << "(" << nl_or_nothing; formatAST(*ast.children[0], s, indent + 1, hilite, one_line); s << nl_or_nothing << indent_str << ")"; + + if (!ast.alias.empty()) + { + writeAlias(ast.alias, s, hilite, one_line); + if (need_parens) + s << ')'; + } } void formatAST(const ASTCreateQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) @@ -461,17 +484,6 @@ void formatAST(const ASTInsertQuery & ast, std::ostream & s, size_t indent, bo } } -static void writeAlias(const String & name, std::ostream & s, bool hilite, bool one_line) -{ - s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); - - WriteBufferFromOStream wb(s, 32); - writeProbablyBackQuotedString(name, wb); - wb.next(); - - s << (hilite ? hilite_none : ""); -} - void formatAST(const ASTFunction & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. From 6f2eda117e7f6679315c22ebd8ef54e0e0b72e1b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 11:28:12 +0300 Subject: [PATCH 21/88] dbms: fixed error [#METR-17472]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index b95eb965d41..c5a99bf5eb0 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -547,11 +547,16 @@ void ExpressionAnalyzer::normalizeTreeImpl( void ExpressionAnalyzer::executeScalarSubqueries() { - for (auto & child : ast->children) + if (!select_query) + executeScalarSubqueriesImpl(ast); + else { - /// Не опускаемся в FROM и JOIN. - if (!select_query || (child.get() != select_query->table.get() && child.get() != select_query->join.get())) - executeScalarSubqueriesImpl(child); + for (auto & child : ast->children) + { + /// Не опускаемся в FROM и JOIN. + if (child.get() != select_query->table.get() && child.get() != select_query->join.get()) + executeScalarSubqueriesImpl(child); + } } } From 5c2b5ffb1084019d385002f74be95c8975bf16ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 13:40:32 +0300 Subject: [PATCH 22/88] dbms: added function emptyArrayToSingle [#METR-17474]. --- dbms/include/DB/Functions/FunctionsArray.h | 256 +++++++++++++++++- dbms/src/Functions/FunctionsArray.cpp | 1 + .../00205_scalar_subqueries.reference | 3 + .../0_stateless/00205_scalar_subqueries.sql | 3 + .../00206_empty_array_to_single.reference | 14 + .../00206_empty_array_to_single.sql | 8 + 6 files changed, 283 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00206_empty_array_to_single.reference create mode 100644 dbms/tests/queries/0_stateless/00206_empty_array_to_single.sql diff --git a/dbms/include/DB/Functions/FunctionsArray.h b/dbms/include/DB/Functions/FunctionsArray.h index 498af1955b6..6d1be3161e6 100644 --- a/dbms/include/DB/Functions/FunctionsArray.h +++ b/dbms/include/DB/Functions/FunctionsArray.h @@ -43,6 +43,8 @@ namespace DB * Например: arrayEnumerateUniq([10, 20, 10, 30]) = [1, 1, 2, 1] * arrayEnumerateUniq(arr1, arr2...) * - для кортежей из элементов на соответствующих позициях в нескольких массивах. + * + * emptyArrayToSingle(arr) - заменить пустые массивы на массивы из одного элемента со значением "по-умолчанию". */ @@ -1695,13 +1697,263 @@ private: }; +class FunctionEmptyArrayToSingle : public IFunction +{ +public: + static constexpr auto name = "emptyArrayToSingle"; + static IFunction * create(const Context & context) { return new FunctionEmptyArrayToSingle; } + + /// Получить имя функции. + String getName() const + { + return name; + } + + /// Получить типы результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. + DataTypePtr getReturnType(const DataTypes & arguments) const + { + if (arguments.size() != 1) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 1.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeArray * array_type = typeid_cast(arguments[0].get()); + if (!array_type) + throw Exception("Argument for function " + getName() + " must be array.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return arguments[0]->clone(); + } + + /// Выполнить функцию над блоком. + void execute(Block & block, const ColumnNumbers & arguments, size_t result) + { + if (executeConst(block, arguments, result)) + return; + + const ColumnArray * array = typeid_cast(block.getByPosition(arguments[0]).column.get()); + if (!array) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + + ColumnPtr res_ptr = array->cloneEmpty(); + block.getByPosition(result).column = res_ptr; + ColumnArray & res = static_cast(*res_ptr); + + const IColumn & src_data = array->getData(); + const ColumnArray::Offsets_t & src_offsets = array->getOffsets(); + IColumn & res_data = res.getData(); + ColumnArray::Offsets_t & res_offsets = res.getOffsets(); + + if (!( executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeNumber (src_data, src_offsets, res_data, res_offsets) + || executeString (src_data, src_offsets, res_data, res_offsets) + || executeFixedString (src_data, src_offsets, res_data, res_offsets))) + throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + + " of first argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); + } + +private: + bool executeConst(Block & block, const ColumnNumbers & arguments, size_t result) + { + if (const ColumnConstArray * const_array = typeid_cast(block.getByPosition(arguments[0]).column.get())) + { + if (const_array->getData().empty()) + { + auto nested_type = typeid_cast(*block.getByPosition(arguments[0]).type).getNestedType(); + + block.getByPosition(result).column = new ColumnConstArray( + block.rowsInFirstColumn(), + {nested_type->getDefault()}, + nested_type->clone()); + } + else + block.getByPosition(result).column = block.getByPosition(arguments[0]).column; + + return true; + } + else + return false; + } + + template + bool executeNumber( + const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, + IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets) + { + if (const ColumnVector * src_data_concrete = typeid_cast *>(&src_data)) + { + const PODArray & src_data = src_data_concrete->getData(); + PODArray & res_data = typeid_cast &>(res_data_col).getData(); + size_t size = src_offsets.size(); + res_offsets.resize(size); + res_data.reserve(src_data.size()); + + ColumnArray::Offset_t src_prev_offset = 0; + ColumnArray::Offset_t res_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_offsets[i] != src_prev_offset) + { + size_t size_to_write = src_offsets[i] - src_prev_offset; + size_t prev_res_data_size = res_data.size(); + res_data.resize(prev_res_data_size + size_to_write); + memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset], size_to_write * sizeof(T)); + res_prev_offset += size_to_write; + res_offsets[i] = res_prev_offset; + } + else + { + res_data.push_back(T()); + ++res_prev_offset; + res_offsets[i] = res_prev_offset; + } + + src_prev_offset = src_offsets[i]; + } + + return true; + } + else + return false; + } + + bool executeFixedString( + const IColumn & src_data, const ColumnArray::Offsets_t & src_offsets, + IColumn & res_data_col, ColumnArray::Offsets_t & res_offsets) + { + if (const ColumnFixedString * src_data_concrete = typeid_cast(&src_data)) + { + const size_t n = src_data_concrete->getN(); + const ColumnFixedString::Chars_t & src_data = src_data_concrete->getChars(); + ColumnFixedString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); + size_t size = src_offsets.size(); + res_offsets.resize(size); + res_data.reserve(src_data.size()); + + ColumnArray::Offset_t src_prev_offset = 0; + ColumnArray::Offset_t res_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_offsets[i] != src_prev_offset) + { + size_t size_to_write = src_offsets[i] - src_prev_offset; + size_t prev_res_data_size = res_data.size(); + res_data.resize(prev_res_data_size + size_to_write * n); + memcpy(&res_data[prev_res_data_size], &src_data[src_prev_offset], size_to_write * n); + res_prev_offset += size_to_write; + res_offsets[i] = res_prev_offset; + } + else + { + size_t prev_res_data_size = res_data.size(); + res_data.resize(prev_res_data_size + n); + memset(&res_data[prev_res_data_size], 0, n); + ++res_prev_offset; + res_offsets[i] = res_prev_offset; + } + + src_prev_offset = src_offsets[i]; + } + + return true; + } + else + return false; + } + + bool executeString( + const IColumn & src_data, const ColumnArray::Offsets_t & src_array_offsets, + IColumn & res_data_col, ColumnArray::Offsets_t & res_array_offsets) + { + if (const ColumnString * src_data_concrete = typeid_cast(&src_data)) + { + const ColumnString::Offsets_t & src_string_offsets = src_data_concrete->getOffsets(); + ColumnString::Offsets_t & res_string_offsets = typeid_cast(res_data_col).getOffsets(); + + const ColumnString::Chars_t & src_data = src_data_concrete->getChars(); + ColumnString::Chars_t & res_data = typeid_cast(res_data_col).getChars(); + + size_t size = src_array_offsets.size(); + res_array_offsets.resize(size); + res_string_offsets.reserve(src_string_offsets.size()); + res_data.reserve(src_data.size()); + + ColumnArray::Offset_t src_array_prev_offset = 0; + ColumnArray::Offset_t res_array_prev_offset = 0; + + ColumnString::Offset_t src_string_prev_offset = 0; + ColumnString::Offset_t res_string_prev_offset = 0; + + for (size_t i = 0; i < size; ++i) + { + if (src_array_offsets[i] != src_array_prev_offset) + { + size_t array_size = src_array_offsets[i] - src_array_prev_offset; + + size_t bytes_to_copy = 0; + size_t from_string_prev_offset_local = src_string_prev_offset; + for (size_t j = 0; j < array_size; ++j) + { + size_t string_size = src_string_offsets[src_array_prev_offset + j] - from_string_prev_offset_local; + + res_string_prev_offset += string_size; + res_string_offsets.push_back(res_string_prev_offset); + + from_string_prev_offset_local += string_size; + bytes_to_copy += string_size; + } + + size_t res_data_old_size = res_data.size(); + res_data.resize(res_data_old_size + bytes_to_copy); + memcpy(&res_data[res_data_old_size], &src_data[src_string_prev_offset], bytes_to_copy); + + res_array_prev_offset += array_size; + res_array_offsets[i] = res_array_prev_offset; + } + else + { + res_data.push_back(0); /// Пустая строка, включая ноль на конце. + + ++res_string_prev_offset; + res_string_offsets.push_back(res_string_prev_offset); + + ++res_array_prev_offset; + res_array_offsets[i] = res_array_prev_offset; + } + + src_array_prev_offset = src_array_offsets[i]; + + if (src_array_prev_offset) + src_string_prev_offset = src_string_offsets[src_array_prev_offset - 1]; + } + + return true; + } + else + return false; + } +}; + + struct NameHas { static constexpr auto name = "has"; }; struct NameIndexOf { static constexpr auto name = "indexOf"; }; struct NameCountEqual { static constexpr auto name = "countEqual"; }; -typedef FunctionArrayIndex FunctionHas; +typedef FunctionArrayIndex FunctionHas; typedef FunctionArrayIndex FunctionIndexOf; -typedef FunctionArrayIndex FunctionCountEqual; +typedef FunctionArrayIndex FunctionCountEqual; using FunctionEmptyArrayUInt8 = FunctionEmptyArray; using FunctionEmptyArrayUInt16 = FunctionEmptyArray; diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index d30fbff1e7f..4210e77480a 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -27,6 +27,7 @@ void registerFunctionsArray(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); } diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference index 84910dae000..7b3ebbc7519 100644 --- a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.reference @@ -1,2 +1,5 @@ 1 1 1 +1 1 +('2015-01-02','Hello') +('2015-01-02','Hello') ('2015-01-02','Hello') 1 1 diff --git a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql index 71af1ad6a07..f924ff291ea 100644 --- a/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql +++ b/dbms/tests/queries/0_stateless/00205_scalar_subqueries.sql @@ -1,2 +1,5 @@ SELECT (SELECT (SELECT (SELECT (SELECT (SELECT count() FROM (SELECT * FROM system.numbers LIMIT 10)))))) = (SELECT 10), ((SELECT 1, 'Hello', [1, 2]).3)[1]; SELECT toUInt64((SELECT 9)) IN (SELECT number FROM system.numbers LIMIT 10); +SELECT (SELECT toDate('2015-01-02')) = toDate('2015-01-02'), 'Hello' = (SELECT 'Hello'); +SELECT (SELECT toDate('2015-01-02'), 'Hello'); +SELECT (SELECT toDate('2015-01-02'), 'Hello') AS x, x, identity((SELECT 1)), identity((SELECT 1) AS y); diff --git a/dbms/tests/queries/0_stateless/00206_empty_array_to_single.reference b/dbms/tests/queries/0_stateless/00206_empty_array_to_single.reference new file mode 100644 index 00000000000..e4e7e38fa36 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00206_empty_array_to_single.reference @@ -0,0 +1,14 @@ +[1,2] +[0] +[4,5,6] +[''] ['0000-00-00'] ['0000-00-00 00:00:00'] +[0] [''] ['0000-00-00 00:00:00'] ['0000-00-00'] +[0] ['0'] ['2015-01-01 00:00:00'] ['2015-01-01'] +[0,1] [''] ['2015-01-01 00:00:00','2015-01-01 00:00:01'] ['2015-01-01','2015-01-02'] +[0] ['0'] ['2015-01-01 00:00:00','2015-01-01 00:00:01','2015-01-01 00:00:02'] ['2015-01-01','2015-01-02','2015-01-03'] +[0] [''] ['2015-01-01 00:00:00','2015-01-01 00:00:01','2015-01-01 00:00:02','2015-01-01 00:00:03'] ['0000-00-00'] +[0,1] ['0'] ['0000-00-00 00:00:00'] ['2015-01-01'] +[0] [''] ['2015-01-01 00:00:00'] ['2015-01-01','2015-01-02'] +[0] ['0'] ['2015-01-01 00:00:00','2015-01-01 00:00:01'] ['2015-01-01','2015-01-02','2015-01-03'] +[0,1] [''] ['2015-01-01 00:00:00','2015-01-01 00:00:01','2015-01-01 00:00:02'] ['0000-00-00'] +[0] ['0'] ['2015-01-01 00:00:00','2015-01-01 00:00:01','2015-01-01 00:00:02','2015-01-01 00:00:03'] ['2015-01-01'] diff --git a/dbms/tests/queries/0_stateless/00206_empty_array_to_single.sql b/dbms/tests/queries/0_stateless/00206_empty_array_to_single.sql new file mode 100644 index 00000000000..0ad2975fa7f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00206_empty_array_to_single.sql @@ -0,0 +1,8 @@ +SELECT emptyArrayToSingle(arrayFilter(x -> x != 99, arrayJoin([[1, 2], [99], [4, 5, 6]]))); +SELECT emptyArrayToSingle(emptyArrayString()), emptyArrayToSingle(emptyArrayDate()), emptyArrayToSingle(emptyArrayDateTime()); + +SELECT + emptyArrayToSingle(range(number % 3)), + emptyArrayToSingle(arrayMap(x -> toString(x), range(number % 2))), + emptyArrayToSingle(arrayMap(x -> toDateTime('2015-01-01 00:00:00') + x, range(number % 5))), + emptyArrayToSingle(arrayMap(x -> toDate('2015-01-01') + x, range(number % 4))) FROM system.numbers LIMIT 10; From 0439ef5f7f141ecf0b2c35d4cc533b3f389fa94e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 13:54:45 +0300 Subject: [PATCH 23/88] dbms: implemented LEFT ARRAY JOIN [#METR-17474]. --- .../DB/Interpreters/ExpressionActions.h | 4 ++- dbms/include/DB/Parsers/ASTSelectQuery.h | 1 + dbms/src/Interpreters/ExpressionActions.cpp | 26 ++++++++++++++++--- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 4 +-- dbms/src/Parsers/ParserSelectQuery.cpp | 21 ++++++++++++--- dbms/src/Parsers/formatAST.cpp | 4 ++- .../00207_left_array_join.reference | 23 ++++++++++++++++ .../0_stateless/00207_left_array_join.sql | 2 ++ 8 files changed, 75 insertions(+), 10 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00207_left_array_join.reference create mode 100644 dbms/tests/queries/0_stateless/00207_left_array_join.sql diff --git a/dbms/include/DB/Interpreters/ExpressionActions.h b/dbms/include/DB/Interpreters/ExpressionActions.h index b4dedade097..39aed32e3c3 100644 --- a/dbms/include/DB/Interpreters/ExpressionActions.h +++ b/dbms/include/DB/Interpreters/ExpressionActions.h @@ -66,6 +66,7 @@ public: /// Для ARRAY_JOIN NameSet array_joined_columns; + bool array_join_is_left; /// Для JOIN const Join * join = nullptr; @@ -122,13 +123,14 @@ public: return a; } - static ExpressionAction arrayJoin(const NameSet & array_joined_columns) + static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left) { if (array_joined_columns.empty()) throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); ExpressionAction a; a.type = ARRAY_JOIN; a.array_joined_columns = array_joined_columns; + a.array_join_is_left = array_join_is_left; return a; } diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index f6edf7ebfb9..b941046c534 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -50,6 +50,7 @@ public: ASTPtr select_expression_list; ASTPtr database; ASTPtr table; /// Идентификатор, табличная функция или подзапрос (рекурсивно ASTSelectQuery) + bool array_join_is_left = false; /// LEFT ARRAY JOIN ASTPtr array_join_expression_list; /// ARRAY JOIN ASTPtr join; /// Обычный (не ARRAY) JOIN. bool final = false; diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index a227bb1c713..caa612434ce 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -268,6 +269,24 @@ void ExpressionAction::execute(Block & block) const if (!any_array) throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH); + /// Если LEFT ARRAY JOIN, то создаём столбцы, в которых пустые массивы заменены на массивы с одним элементом - значением по-умолчанию. + std::map non_empty_array_columns; + if (array_join_is_left) + { + for (const auto & name : array_joined_columns) + { + auto src_col = block.getByName(name); + + Block tmp_block{src_col, {{}, src_col.type, {}}}; + + FunctionEmptyArrayToSingle().execute(tmp_block, {0}, 1); + non_empty_array_columns[name] = tmp_block.getByPosition(1).column; + } + + any_array_ptr = non_empty_array_columns.begin()->second; + any_array = typeid_cast(&*any_array_ptr); + } + size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) { @@ -278,7 +297,8 @@ void ExpressionAction::execute(Block & block) const if (!typeid_cast(&*current.type)) throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH); - ColumnPtr array_ptr = current.column; + ColumnPtr array_ptr = array_join_is_left ? non_empty_array_columns[current.name] : current.column; + if (array_ptr->isConst()) array_ptr = dynamic_cast(*array_ptr).convertToFullColumn(); @@ -379,7 +399,7 @@ std::string ExpressionAction::toString() const break; case ARRAY_JOIN: - ss << "ARRAY JOIN "; + ss << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN "; for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it) { if (it != array_joined_columns.begin()) @@ -761,7 +781,7 @@ std::string ExpressionActions::getID() const ss << actions[i].result_name; if (actions[i].type == ExpressionAction::ARRAY_JOIN) { - ss << "{"; + ss << (actions[i].array_join_is_left ? "LEFT ARRAY JOIN" : "ARRAY JOIN") << "{"; for (NameSet::const_iterator it = actions[i].array_joined_columns.begin(); it != actions[i].array_joined_columns.end(); ++it) { diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index c5a99bf5eb0..37bd9cc15a3 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1369,7 +1369,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl actions_stack.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name)); NameSet joined_columns; joined_columns.insert(result_name); - actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns)); + actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns, false)); } return; @@ -1666,7 +1666,7 @@ void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actio result_columns.insert(result_source.first); } - actions->add(ExpressionAction::arrayJoin(result_columns)); + actions->add(ExpressionAction::arrayJoin(result_columns, select_query->array_join_is_left)); } bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types) diff --git a/dbms/src/Parsers/ParserSelectQuery.cpp b/dbms/src/Parsers/ParserSelectQuery.cpp index 97a171846d1..ee0908f185b 100644 --- a/dbms/src/Parsers/ParserSelectQuery.cpp +++ b/dbms/src/Parsers/ParserSelectQuery.cpp @@ -23,6 +23,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p ParserString s_select("SELECT", true, true); ParserString s_distinct("DISTINCT", true, true); ParserString s_from("FROM", true, true); + ParserString s_left("LEFT", true, true); ParserString s_array("ARRAY", true, true); ParserString s_join("JOIN", true, true); ParserString s_using("USING", true, true); @@ -166,8 +167,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p if (!parse_final_and_sample()) return false; - /// ARRAY JOIN expr list - if (s_array.ignore(pos, end, max_parsed_pos, expected)) + /// [LEFT] ARRAY JOIN expr list + Pos saved_pos = pos; + bool has_array_join = false; + if (s_left.ignore(pos, end, max_parsed_pos, expected) && ws.ignore(pos, end) && s_array.ignore(pos, end, max_parsed_pos, expected)) + { + select_query->array_join_is_left = true; + has_array_join = true; + } + else + { + pos = saved_pos; + if (s_array.ignore(pos, end, max_parsed_pos, expected)) + has_array_join = true; + } + + if (has_array_join) { ws.ignore(pos, end); @@ -182,7 +197,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p ws.ignore(pos, end); } - /// [GLOBAL] ANY|ALL INNER|LEFT JOIN (subquery) USING tuple + /// [GLOBAL] [ANY|ALL] INNER|LEFT|RIGHT|FULL|CROSS [OUTER] JOIN (subquery)|table_name USING tuple join.parse(pos, end, select_query->join, max_parsed_pos, expected); if (!parse_final_and_sample()) diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 68feeb8d518..5d0854827ef 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -163,7 +163,9 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo if (ast.array_join_expression_list) { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ARRAY JOIN " << (hilite ? hilite_none : ""); + s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str + << (ast.array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (hilite ? hilite_none : ""); + one_line ? formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line) : formatExpressionListMultiline(typeid_cast(*ast.array_join_expression_list), s, indent, hilite); diff --git a/dbms/tests/queries/0_stateless/00207_left_array_join.reference b/dbms/tests/queries/0_stateless/00207_left_array_join.reference new file mode 100644 index 00000000000..10ec6a7a16f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00207_left_array_join.reference @@ -0,0 +1,23 @@ +0 +1 +2 +2 +3 +4 +5 +5 +6 +7 +0 [] 0 +1 [0] 0 +2 [0,1] 0 +2 [0,1] 1 +3 [] 0 +4 [0] 0 +5 [0,1] 0 +5 [0,1] 1 +6 [] 0 +7 [0] 0 +8 [0,1] 0 +8 [0,1] 1 +9 [] 0 diff --git a/dbms/tests/queries/0_stateless/00207_left_array_join.sql b/dbms/tests/queries/0_stateless/00207_left_array_join.sql new file mode 100644 index 00000000000..8186054c250 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00207_left_array_join.sql @@ -0,0 +1,2 @@ +SELECT number FROM system.numbers LEFT ARRAY JOIN range(number % 3) AS arr LIMIT 10; +SELECT number, arr, x FROM (SELECT number, range(number % 3) AS arr FROM system.numbers LIMIT 10) LEFT ARRAY JOIN arr AS x; From c5570f3f898ec73d8a5f27b0a5e3e54ffb1b8ea3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 14:07:03 +0300 Subject: [PATCH 24/88] dbms: little better [#METR-17472]. --- dbms/src/Parsers/ParserJoin.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dbms/src/Parsers/ParserJoin.cpp b/dbms/src/Parsers/ParserJoin.cpp index 4cb7aeb89bb..3c55cf82f0b 100644 --- a/dbms/src/Parsers/ParserJoin.cpp +++ b/dbms/src/Parsers/ParserJoin.cpp @@ -30,7 +30,7 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p ParserString s_using("USING", true, true); ParserNotEmptyExpressionList exp_list; - ParserSubquery subquery; + ParserWithOptionalAlias subquery(ParserPtr(new ParserSubquery)); ParserIdentifier identifier; ws.ignore(pos, end); @@ -91,10 +91,6 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p ws.ignore(pos, end); - /// Может быть указан алиас. На данный момент, он ничего не значит и не используется. - ParserAlias().ignore(pos, end); - ws.ignore(pos, end); - if (join->kind != ASTJoin::Cross) { if (!s_using.ignore(pos, end, max_parsed_pos, expected)) From 38d6128150a8733ae17e72e45eb1fdfe545f76f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 14:34:41 +0300 Subject: [PATCH 25/88] dbms: added column sizes to system.columns table [#METR-17475]. --- .../DB/Storages/MergeTree/MergeTreeData.h | 9 +++- dbms/src/Storages/StorageSystemColumns.cpp | 54 ++++++++++++++++--- 2 files changed, 54 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeData.h b/dbms/include/DB/Storages/MergeTree/MergeTreeData.h index 5c814ecb696..47153ecc8f9 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeData.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeData.h @@ -779,6 +779,13 @@ public: return it == std::end(column_sizes) ? 0 : it->second; } + using ColumnSizes = std::unordered_map; + ColumnSizes getColumnSizes() const + { + Poco::ScopedLock lock{data_parts_mutex}; + return column_sizes; + } + /// Для ATTACH/DETACH/DROP PARTITION. static String getMonthName(const Field & partition); static DayNum_t getMonthDayNum(const Field & partition); @@ -810,7 +817,7 @@ private: NamesAndTypesListPtr columns; /// Актуальные размеры столбцов в сжатом виде - std::unordered_map column_sizes; + ColumnSizes column_sizes; BrokenPartCallback broken_part_callback; diff --git a/dbms/src/Storages/StorageSystemColumns.cpp b/dbms/src/Storages/StorageSystemColumns.cpp index 83ced0d22f4..b87872eec1a 100644 --- a/dbms/src/Storages/StorageSystemColumns.cpp +++ b/dbms/src/Storages/StorageSystemColumns.cpp @@ -1,6 +1,10 @@ #include +#include +#include +#include #include #include +#include #include #include @@ -15,7 +19,8 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "name", new DataTypeString }, { "type", new DataTypeString }, { "default_type", new DataTypeString }, - { "default_expression", new DataTypeString } + { "default_expression", new DataTypeString }, + { "bytes", new DataTypeUInt64 }, } { } @@ -103,6 +108,7 @@ BlockInputStreams StorageSystemColumns::read( ColumnPtr type_column = new ColumnString; ColumnPtr default_type_column = new ColumnString; ColumnPtr default_expression_column = new ColumnString; + ColumnPtr bytes_column = new ColumnUInt64; size_t rows = filtered_database_column->size(); for (size_t i = 0; i < rows; ++i) @@ -112,6 +118,7 @@ BlockInputStreams StorageSystemColumns::read( NamesAndTypesList columns; ColumnDefaults column_defaults; + std::unordered_map column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); @@ -120,6 +127,26 @@ BlockInputStreams StorageSystemColumns::read( columns = storage->getColumnsList(); columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); column_defaults = storage->column_defaults; + + /** Данные о размерах столбцов для таблиц семейства MergeTree. + * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage. + */ + if (auto storage_concrete = dynamic_cast(storage.get())) + { + column_sizes = storage_concrete->getData().getColumnSizes(); + } + else if (auto storage_concrete = dynamic_cast(storage.get())) + { + column_sizes = storage_concrete->getData().getColumnSizes(); + + auto unreplicated_data = storage_concrete->getUnreplicatedData(); + if (unreplicated_data) + { + auto unreplicated_column_sizes = unreplicated_data->getColumnSizes(); + for (const auto & name_size : unreplicated_column_sizes) + column_sizes[name_size.first] += name_size.second; + } + } } for (const auto & column : columns) @@ -129,16 +156,26 @@ BlockInputStreams StorageSystemColumns::read( name_column->insert(column.name); type_column->insert(column.type->getName()); - const auto it = column_defaults.find(column.name); - if (it == std::end(column_defaults)) { - default_type_column->insertDefault(); - default_expression_column->insertDefault(); + const auto it = column_defaults.find(column.name); + if (it == std::end(column_defaults)) + { + default_type_column->insertDefault(); + default_expression_column->insertDefault(); + } + else + { + default_type_column->insert(toString(it->second.type)); + default_expression_column->insert(queryToString(it->second.expression)); + } } - else + { - default_type_column->insert(toString(it->second.type)); - default_expression_column->insert(queryToString(it->second.expression)); + const auto it = column_sizes.find(column.name); + if (it == std::end(column_sizes)) + bytes_column->insertDefault(); + else + bytes_column->insert(it->second); } } } @@ -151,6 +188,7 @@ BlockInputStreams StorageSystemColumns::read( block.insert(ColumnWithTypeAndName(type_column, new DataTypeString, "type")); block.insert(ColumnWithTypeAndName(default_type_column, new DataTypeString, "default_type")); block.insert(ColumnWithTypeAndName(default_expression_column, new DataTypeString, "default_expression")); + block.insert(ColumnWithTypeAndName(bytes_column, new DataTypeUInt64, "bytes")); return BlockInputStreams{ 1, new OneBlockInputStream(block) }; } From 99d64cbc7fcbc425c3de6c1682c3290213358c44 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Jul 2015 15:06:29 +0300 Subject: [PATCH 26/88] Merge --- .../AggregateFunctionFactory.cpp | 16 ++++++++-------- .../0_stateless/00208_agg_state_merge.reference | 7 +++++++ .../0_stateless/00208_agg_state_merge.sql | 1 + 3 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00208_agg_state_merge.reference create mode 100644 dbms/tests/queries/0_stateless/00208_agg_state_merge.sql diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 4676d21bdda..245530b86d6 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -652,7 +652,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da AggregateFunctionPtr nested = get(String(name.data(), name.size() - strlen("State")), argument_types, recursion_level + 1); return new AggregateFunctionState(nested); } - else if (recursion_level == 0 && name.size() > strlen("Merge") && !(strcmp(name.data() + name.size() - strlen("Merge"), "Merge"))) + else if (recursion_level <= 1 && name.size() > strlen("Merge") && !(strcmp(name.data() + name.size() - strlen("Merge"), "Merge"))) { /// Для агрегатных функций вида aggMerge, где agg - имя другой агрегатной функции. if (argument_types.size() != 1) @@ -668,7 +668,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da return new AggregateFunctionMerge(nested); } - else if (recursion_level <= 1 && name.size() >= 3 && name[name.size() - 2] == 'I' && name[name.size() - 1] == 'f') + else if (recursion_level <= 2 && name.size() >= 3 && name[name.size() - 2] == 'I' && name[name.size() - 1] == 'f') { if (argument_types.empty()) throw Exception{ @@ -682,7 +682,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da AggregateFunctionPtr nested = get(String(name.data(), name.size() - 2), nested_dt, recursion_level + 1); return new AggregateFunctionIf(nested); } - else if (recursion_level <= 2 && name.size() > strlen("Array") && !(strcmp(name.data() + name.size() - strlen("Array"), "Array"))) + else if (recursion_level <= 3 && name.size() > strlen("Array") && !(strcmp(name.data() + name.size() - strlen("Array"), "Array"))) { /// Для агрегатных функций вида aggArray, где agg - имя другой агрегатной функции. size_t num_agruments = argument_types.size(); @@ -695,7 +695,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da else throw Exception("Illegal type " + argument_types[i]->getName() + " of argument #" + toString(i + 1) + " for aggregate function " + name + ". Must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - AggregateFunctionPtr nested = get(String(name.data(), name.size() - strlen("Array")), nested_arguments, recursion_level + 2); /// + 2, чтобы ни один другой модификатор не мог идти перед Array + AggregateFunctionPtr nested = get(String(name.data(), name.size() - strlen("Array")), nested_arguments, recursion_level + 3); /// + 3, чтобы ни один другой модификатор не мог идти перед Array return new AggregateFunctionArray(nested); } else @@ -765,14 +765,14 @@ bool AggregateFunctionFactory::isAggregateFunctionName(const String & name, int if (recursion_level <= 0 && name.size() > strlen("State") && !(strcmp(name.data() + name.size() - strlen("State"), "State"))) return isAggregateFunctionName(String(name.data(), name.size() - strlen("State")), recursion_level + 1); /// Для агрегатных функций вида aggMerge, где agg - имя другой агрегатной функции. - if (recursion_level <= 0 && name.size() > strlen("Merge") && !(strcmp(name.data() + name.size() - strlen("Merge"), "Merge"))) + if (recursion_level <= 1 && name.size() > strlen("Merge") && !(strcmp(name.data() + name.size() - strlen("Merge"), "Merge"))) return isAggregateFunctionName(String(name.data(), name.size() - strlen("Merge")), recursion_level + 1); /// Для агрегатных функций вида aggIf, где agg - имя другой агрегатной функции. - if (recursion_level <= 1 && name.size() >= 3 && name[name.size() - 2] == 'I' && name[name.size() - 1] == 'f') + if (recursion_level <= 2 && name.size() >= 3 && name[name.size() - 2] == 'I' && name[name.size() - 1] == 'f') return isAggregateFunctionName(String(name.data(), name.size() - 2), recursion_level + 1); /// Для агрегатных функций вида aggArray, где agg - имя другой агрегатной функции. - if (recursion_level <= 2 && name.size() > strlen("Array") && !(strcmp(name.data() + name.size() - strlen("Array"), "Array"))) - return isAggregateFunctionName(String(name.data(), name.size() - strlen("Array")), recursion_level + 2); /// + 2, чтобы ни один другой модификатор не мог идти перед Array + if (recursion_level <= 3 && name.size() > strlen("Array") && !(strcmp(name.data() + name.size() - strlen("Array"), "Array"))) + return isAggregateFunctionName(String(name.data(), name.size() - strlen("Array")), recursion_level + 3); /// + 3, чтобы ни один другой модификатор не мог идти перед Array return false; } diff --git a/dbms/tests/queries/0_stateless/00208_agg_state_merge.reference b/dbms/tests/queries/0_stateless/00208_agg_state_merge.reference new file mode 100644 index 00000000000..d21a7aa01e4 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00208_agg_state_merge.reference @@ -0,0 +1,7 @@ +0 15 15 +1 14 14 +2 14 14 +3 15 15 +4 9 9 +5 9 9 +6 9 9 diff --git a/dbms/tests/queries/0_stateless/00208_agg_state_merge.sql b/dbms/tests/queries/0_stateless/00208_agg_state_merge.sql new file mode 100644 index 00000000000..3f30f66dd44 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00208_agg_state_merge.sql @@ -0,0 +1 @@ +SELECT k % 7 AS k2, finalizeAggregation(uniqMergeState(state)), uniqMerge(state) FROM (SELECT k, uniqState(x) AS state FROM (SELECT number % 11 AS k, intDiv(number, 7) AS x FROM system.numbers LIMIT 100) GROUP BY k) GROUP BY k2 ORDER BY k2; From bc6598371cf5c2380bc53b7c4b4b389f7b19120a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Jul 2015 16:36:22 +0300 Subject: [PATCH 27/88] dbms: fixed comment [#METR-2944]. --- dbms/include/DB/Storages/MergeTree/MergeTreeReader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h b/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h index e52d2d6a725..4e76ba89e89 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeReader.h @@ -247,7 +247,7 @@ private: ++right; } - /// Если правее засечек нет, просто используем DEFAULT_BUFFER_SIZE + /// Если правее засечек нет, просто используем max_read_buffer_size if (right >= (*marks).size() || (right + 1 == (*marks).size() && (*marks)[right].offset_in_compressed_file == (*marks)[all_mark_ranges[i].end].offset_in_compressed_file)) { From e3b5bc0ea6b85f1f9ad8bd01230b554a5878b9b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Jul 2015 18:51:37 +0300 Subject: [PATCH 28/88] dbms: fixed segfault when using INSERT SELECT or CREATE AS SELECT with extremes=1 or WITH TOTALS [#METR-17491]. --- dbms/include/DB/DataStreams/FormatFactory.h | 4 +-- dbms/src/Client/Client.cpp | 26 ++++++++++++------- dbms/src/Core/Block.cpp | 8 +++++- dbms/src/DataStreams/FormatFactory.cpp | 4 +-- .../Interpreters/InterpreterCreateQuery.cpp | 1 + .../Interpreters/InterpreterInsertQuery.cpp | 1 + 6 files changed, 30 insertions(+), 14 deletions(-) diff --git a/dbms/include/DB/DataStreams/FormatFactory.h b/dbms/include/DB/DataStreams/FormatFactory.h index d8e224e1cb0..848f84828af 100644 --- a/dbms/include/DB/DataStreams/FormatFactory.h +++ b/dbms/include/DB/DataStreams/FormatFactory.h @@ -14,10 +14,10 @@ class FormatFactory { public: BlockInputStreamPtr getInput(const String & name, ReadBuffer & buf, - Block & sample, size_t max_block_size) const; + const Block & sample, size_t max_block_size) const; BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf, - Block & sample) const; + const Block & sample) const; }; } diff --git a/dbms/src/Client/Client.cpp b/dbms/src/Client/Client.cpp index f910e3b88a3..4c5a90b3796 100644 --- a/dbms/src/Client/Client.cpp +++ b/dbms/src/Client/Client.cpp @@ -860,15 +860,8 @@ private: } - void onData(Block & block) + void initBlockOutputStream(const Block & block) { - if (written_progress_chars) - clearProgress(); - - if (!block) - return; - - processed_rows += block.rows(); if (!block_std_out) { String current_format = format; @@ -891,8 +884,21 @@ private: block_std_out = context.getFormatFactory().getOutput(current_format, std_out, block); block_std_out->writePrefix(); } + } - /// Загаловочный блок с нулем строк использовался для инициализации block_std_out, + + void onData(Block & block) + { + if (written_progress_chars) + clearProgress(); + + if (!block) + return; + + processed_rows += block.rows(); + initBlockOutputStream(block); + + /// Заголовочный блок с нулем строк использовался для инициализации block_std_out, /// выводить его не нужно if (block.rows() != 0) { @@ -907,11 +913,13 @@ private: void onTotals(Block & block) { + initBlockOutputStream(block); block_std_out->setTotals(block); } void onExtremes(Block & block) { + initBlockOutputStream(block); block_std_out->setExtremes(block); } diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 7c827846980..fa164d12a18 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -302,7 +302,13 @@ std::string Block::dumpStructure() const { if (it != data.begin()) res << ", "; - res << it->name << ' ' << it->type->getName() << ' ' << it->column->getName() << ' ' << it->column->size(); + + res << it->name << ' ' << it->type->getName(); + + if (it->column) + res << ' ' << it->column->getName() << ' ' << it->column->size(); + else + res << "nullptr"; } return res.str(); } diff --git a/dbms/src/DataStreams/FormatFactory.cpp b/dbms/src/DataStreams/FormatFactory.cpp index ea82279d977..82d0b8dc913 100644 --- a/dbms/src/DataStreams/FormatFactory.cpp +++ b/dbms/src/DataStreams/FormatFactory.cpp @@ -26,7 +26,7 @@ namespace DB { BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & buf, - Block & sample, size_t max_block_size) const + const Block & sample, size_t max_block_size) const { if (name == "Native") return new NativeBlockInputStream(buf); @@ -48,7 +48,7 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf, - Block & sample) const + const Block & sample) const { if (name == "Native") return new NativeBlockOutputStream(buf); diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 969dd1c6562..a9eac811dbe 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -252,6 +252,7 @@ BlockIO InterpreterCreateQuery::executeImpl(bool assume_metadata_exists) if (create.select && storage_name != "View" && (storage_name != "MaterializedView" || create.is_populate)) { BlockIO io; + io.in_sample = select_sample; io.in = new NullAndDoCopyBlockInputStream( new MaterializingBlockInputStream(interpreter_select->execute().in), res->write(query_ptr)); diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index cd9da42f770..06bc5709614 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -100,6 +100,7 @@ BlockIO InterpreterInsertQuery::execute() InterpreterSelectQuery interpreter_select{query.select, context}; BlockInputStreamPtr in{interpreter_select.execute().in}; res.in = new NullAndDoCopyBlockInputStream{in, out}; + res.in_sample = interpreter_select.getSampleBlock(); } return res; From fc49f1ab55de01268d417977c3c8050b1dd90cc4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Jul 2015 19:57:02 +0300 Subject: [PATCH 29/88] dbms: added tests [#METR-17491]. --- .../00209_insert_select_extremes.reference | 11 +++++++++++ .../0_stateless/00209_insert_select_extremes.sql | 11 +++++++++++ .../00210_insert_select_extremes_http.reference | 3 +++ .../0_stateless/00210_insert_select_extremes_http.sh | 6 ++++++ 4 files changed, 31 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00209_insert_select_extremes.reference create mode 100644 dbms/tests/queries/0_stateless/00209_insert_select_extremes.sql create mode 100644 dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.reference create mode 100755 dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh diff --git a/dbms/tests/queries/0_stateless/00209_insert_select_extremes.reference b/dbms/tests/queries/0_stateless/00209_insert_select_extremes.reference new file mode 100644 index 00000000000..e86726625a1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00209_insert_select_extremes.reference @@ -0,0 +1,11 @@ + +1 +1 + +0 + +0 + +1 +1 +4 1 1 diff --git a/dbms/tests/queries/0_stateless/00209_insert_select_extremes.sql b/dbms/tests/queries/0_stateless/00209_insert_select_extremes.sql new file mode 100644 index 00000000000..0d632992b67 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00209_insert_select_extremes.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS test.test; +CREATE TABLE test.test (x UInt8) ENGINE = Log; + +INSERT INTO test.test SELECT 1 AS x; +INSERT INTO test.test SELECT 1 AS x SETTINGS extremes = 1; +INSERT INTO test.test SELECT 1 AS x GROUP BY 1 WITH TOTALS; +INSERT INTO test.test SELECT 1 AS x GROUP BY 1 WITH TOTALS SETTINGS extremes = 1; + +SELECT count(), min(x), max(x) FROM test.test; + +DROP TABLE test.test; diff --git a/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.reference b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.reference new file mode 100644 index 00000000000..016f3290af0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.reference @@ -0,0 +1,3 @@ + +1 +1 diff --git a/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh new file mode 100755 index 00000000000..c23a596b09d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +curl http://localhost:8123/?extremes=1 -d @- <<< "DROP TABLE IF EXISTS test.test" +curl http://localhost:8123/?extremes=1 -d @- <<< "CREATE TABLE test.test (x UInt8) ENGINE = Log" +curl http://localhost:8123/?extremes=1 -d @- <<< "INSERT INTO test.test SELECT 1 AS x" +curl http://localhost:8123/?extremes=1 -d @- <<< "DROP TABLE test.test" From f98ab0c9c1920a255cf7e5abf574aba496328b91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Jul 2015 20:36:52 +0300 Subject: [PATCH 30/88] dbms: allowed to cancel query while sending external tables data [#METR-15144]. --- .../DB/DataStreams/RemoteBlockInputStream.h | 49 +++++++++++++------ 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h index 30b044c5488..02ac9c225f4 100644 --- a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h +++ b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h @@ -84,6 +84,16 @@ public: if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) return; + { + std::lock_guard lock(external_tables_mutex); + + /// Останавливаем отправку внешних данных. + for (auto & vec : external_tables_data) + for (auto & elem : vec) + if (IProfilingBlockInputStream * stream = dynamic_cast(elem.first.get())) + stream->cancel(); + } + if (!isQueryPending() || hasThrownException()) return; @@ -107,27 +117,30 @@ protected: { size_t count = parallel_replicas->size(); - std::vector instances; - instances.reserve(count); - - for (size_t i = 0; i < count; ++i) { - ExternalTablesData res; - for (const auto & table : external_tables) + std::lock_guard lock(external_tables_mutex); + + external_tables_data.reserve(count); + + for (size_t i = 0; i < count; ++i) { - StoragePtr cur = table.second; - QueryProcessingStage::Enum stage = QueryProcessingStage::Complete; - DB::BlockInputStreams input = cur->read(cur->getColumnNamesList(), ASTPtr(), context, settings, - stage, DEFAULT_BLOCK_SIZE, 1); - if (input.size() == 0) - res.push_back(std::make_pair(new OneBlockInputStream(cur->getSampleBlock()), table.first)); - else - res.push_back(std::make_pair(input[0], table.first)); + ExternalTablesData res; + for (const auto & table : external_tables) + { + StoragePtr cur = table.second; + QueryProcessingStage::Enum stage = QueryProcessingStage::Complete; + DB::BlockInputStreams input = cur->read(cur->getColumnNamesList(), ASTPtr(), context, settings, + stage, DEFAULT_BLOCK_SIZE, 1); + if (input.size() == 0) + res.push_back(std::make_pair(new OneBlockInputStream(cur->getSampleBlock()), table.first)); + else + res.push_back(std::make_pair(input[0], table.first)); + } + external_tables_data.push_back(std::move(res)); } - instances.push_back(std::move(res)); } - parallel_replicas->sendExternalTablesData(instances); + parallel_replicas->sendExternalTablesData(external_tables_data); } Block readImpl() override @@ -302,6 +315,10 @@ private: QueryProcessingStage::Enum stage; Context context; + /// Потоки для чтения из временных таблиц - для последующей отправки данных на удалённые серверы для GLOBAL-подзапросов. + std::vector external_tables_data; + std::mutex external_tables_mutex; + /// Установили соединения с репликами, но ещё не отправили запрос. std::atomic established { false }; From 6bdcf3a9ae9a7bacb34ab094adc81856eca29623 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 27 Jul 2015 21:05:32 +0300 Subject: [PATCH 31/88] dbms: fixed test [#METR-2944]. --- .../0_stateless/00210_insert_select_extremes_http.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh index c23a596b09d..e9b82bccfa5 100755 --- a/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh +++ b/dbms/tests/queries/0_stateless/00210_insert_select_extremes_http.sh @@ -1,6 +1,6 @@ #!/bin/bash -curl http://localhost:8123/?extremes=1 -d @- <<< "DROP TABLE IF EXISTS test.test" -curl http://localhost:8123/?extremes=1 -d @- <<< "CREATE TABLE test.test (x UInt8) ENGINE = Log" -curl http://localhost:8123/?extremes=1 -d @- <<< "INSERT INTO test.test SELECT 1 AS x" -curl http://localhost:8123/?extremes=1 -d @- <<< "DROP TABLE test.test" +curl -sS http://localhost:8123/?extremes=1 -d @- <<< "DROP TABLE IF EXISTS test.test" +curl -sS http://localhost:8123/?extremes=1 -d @- <<< "CREATE TABLE test.test (x UInt8) ENGINE = Log" +curl -sS http://localhost:8123/?extremes=1 -d @- <<< "INSERT INTO test.test SELECT 1 AS x" +curl -sS http://localhost:8123/?extremes=1 -d @- <<< "DROP TABLE test.test" From 4da092418eabaecb0f40857654f0782da696b986 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 17:14:44 +0300 Subject: [PATCH 32/88] dbms: Server: Fixes. [#METR-17276] --- dbms/include/DB/Common/HashTable/HashTable.h | 6 +++--- dbms/include/DB/Common/HashTable/SmallTable.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 2ce34905d69..0b216e1ca0e 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -465,7 +465,7 @@ public: if ((read_count == 0) || is_eof) throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); - return Cell::getKey(cell.getValue()); + return cell.getValue(); } private: @@ -803,7 +803,7 @@ public: { Cell x; x.read(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } @@ -827,7 +827,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } diff --git a/dbms/include/DB/Common/HashTable/SmallTable.h b/dbms/include/DB/Common/HashTable/SmallTable.h index 521fe117845..c68963a4798 100644 --- a/dbms/include/DB/Common/HashTable/SmallTable.h +++ b/dbms/include/DB/Common/HashTable/SmallTable.h @@ -105,7 +105,7 @@ public: if ((read_count == 0) || is_eof) throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); - return Cell::getKey(cell.getValue()); + return cell.getValue(); } private: From 99e2b241c0cb179e0154f52fc11fc9387725cbf9 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 17:53:34 +0300 Subject: [PATCH 33/88] dbms: Server: Fixes. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 5 +---- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 5 ----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 146bb6a9394..a5a7b3f795d 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -193,10 +193,7 @@ namespace detail { static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) { - if (data.set.isMedium()) - data.set.insert(static_cast &>(column).getData()[row_num]); - else - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + data.set.insert(static_cast &>(column).getData()[row_num]); } }; diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index b7b3fb50552..e5cab187e68 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -204,11 +204,6 @@ public: throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } - bool isMedium() const - { - return getContainerType() == details::ContainerType::MEDIUM; - } - private: void toMedium() { From 98557ba369db99f4fc82e2b2db19e8df5dacb926 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 20:06:39 +0300 Subject: [PATCH 34/88] dbms: Server: Feed CombinedCardinalityEstimator with at most 32-bit wide values. [#METR-17276] --- .../AggregateFunctionUniq.h | 61 ++++++++++++++++++- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index a5a7b3f795d..44d4d7bc4b2 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -122,7 +122,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19>; Set set; static String getName() { return "uniqCombined"; } @@ -132,7 +132,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19>; Set set; static String getName() { return "uniqCombined"; } @@ -140,6 +140,60 @@ struct AggregateFunctionUniqCombinedData namespace detail { + template + struct Hash64To32; + + template + struct Hash64To32::value || std::is_same::value>::type> + { + static UInt32 compute(T key) + { + using U = typename std::make_unsigned::type; + auto x = static_cast(key); + + x = (~x) + (x << 18); + x = x ^ (x >> 31); + x = x * 21; + x = x ^ (x >> 11); + x = x + (x << 6); + x = x ^ (x >> 22); + return static_cast(x); + } + }; + + template + struct CombinedCardinalityTraits + { + static UInt32 hash(T key) + { + return key; + } + }; + + template + struct CombinedCardinalityTraits::value || std::is_same::value>::type> + { + using Op = Hash64To32; + + static UInt32 hash(T key) + { + return Op::compute(key); + }; + }; + + template + struct CombinedCardinalityTraits::value>::type> + { + using Op = Hash64To32; + + static UInt32 hash(T key) + { + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return Op::compute(res); + } + }; + /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. * Используется для частичной специализации для добавления строк. */ @@ -193,7 +247,8 @@ namespace detail { static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) { - data.set.insert(static_cast &>(column).getData()[row_num]); + const auto & value = static_cast &>(column).getData()[row_num]; + data.set.insert(CombinedCardinalityTraits::hash(value)); } }; From b17d3d79fc493cc6fdfbf06142f24cfc5bd534e7 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 20:13:02 +0300 Subject: [PATCH 35/88] dbms: Server: Updated comment. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 44d4d7bc4b2..d61e99c2c86 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -146,6 +146,7 @@ namespace detail template struct Hash64To32::value || std::is_same::value>::type> { + /// https://gist.github.com/badboy/6267743 static UInt32 compute(T key) { using U = typename std::make_unsigned::type; From 3d301bb74939818e648c8803b3a413f73dd4254a Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 20:22:17 +0300 Subject: [PATCH 36/88] dbms: Server: Use trivial has for HLL12 case. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index d61e99c2c86..20b804d08a5 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -122,7 +122,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -132,7 +132,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } From 3251f5978a4221d8f2186904c08cd44ed1c1d364 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 30 Jul 2015 20:04:49 +0300 Subject: [PATCH 37/88] dbms: Server: Made HyperLogLog implementation more understandable. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index e5cab187e68..00a01232b31 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -87,8 +87,6 @@ public: } else if (container_type == details::ContainerType::LARGE) getContainer().insert(value); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } UInt32 size() const @@ -151,8 +149,6 @@ public: toLarge(); getContainer().read(in); } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void readAndMerge(DB::ReadBuffer & in) @@ -200,8 +196,6 @@ public: getContainer().write(out); else if (container_type == details::ContainerType::LARGE) getContainer().write(out); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } private: @@ -217,12 +211,8 @@ private: new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - std::atomic_signal_fence(std::memory_order_seq_cst); - setContainerType(details::ContainerType::MEDIUM); - std::atomic_signal_fence(std::memory_order_seq_cst); - if (current_memory_tracker) current_memory_tracker->alloc(sizeof(medium)); } @@ -251,12 +241,8 @@ private: new (&large) std::unique_ptr{ std::move(tmp_large) }; - std::atomic_signal_fence(std::memory_order_seq_cst); - setContainerType(details::ContainerType::LARGE); - std::atomic_signal_fence(std::memory_order_seq_cst); - if (current_memory_tracker) current_memory_tracker->alloc(sizeof(large)); From 5b0b5dc9cf455fbe11b46f8b8628a898a4fc08d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 31 Jul 2015 02:41:02 +0300 Subject: [PATCH 38/88] dbms: added memory-efficient mode of distributed aggregation [#METR-17536]. --- ...ggregatedMemoryEfficientBlockInputStream.h | 110 ++++++++++++++++++ .../DB/DataStreams/RemoteBlockInputStream.h | 38 ++++-- dbms/include/DB/Interpreters/Aggregator.h | 28 +++-- dbms/include/DB/Interpreters/Settings.h | 2 + dbms/src/Core/Block.cpp | 11 +- dbms/src/Interpreters/Aggregator.cpp | 62 ++++++++++ .../Interpreters/InterpreterSelectQuery.cpp | 35 +++++- 7 files changed, 257 insertions(+), 29 deletions(-) create mode 100644 dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h diff --git a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h new file mode 100644 index 00000000000..cbe117dbcdb --- /dev/null +++ b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -0,0 +1,110 @@ +#pragma once + +#include +#include + + +namespace DB +{ + + +/** Доагрегирует потоки блоков, держа в оперативной памяти только по одному блоку из каждого потока. + * Это экономит оперативку в случае использования двухуровневой агрегации, где в каждом потоке будет до 256 блоков с частями результата. + * + * Агрегатные функции в блоках не должны быть финализированы, чтобы их состояния можно было объединить. + */ +class MergingAggregatedMemoryEfficientBlockInputStream : public IProfilingBlockInputStream +{ +public: + MergingAggregatedMemoryEfficientBlockInputStream(BlockInputStreams inputs_, const Names & keys_names_, + const AggregateDescriptions & aggregates_, bool overflow_row_, bool final_) + : aggregator(keys_names_, aggregates_, overflow_row_, 0, OverflowMode::THROW, nullptr, 0, 0), + final(final_) + { + children = inputs_; + current_blocks.resize(children.size()); + overflow_blocks.resize(children.size()); + is_exhausted.resize(children.size()); + } + + String getName() const override { return "MergingAggregatedMemorySavvy"; } + + String getID() const override + { + std::stringstream res; + res << "MergingAggregatedMemorySavvy(" << aggregator.getID(); + for (size_t i = 0, size = children.size(); i < size; ++i) + res << ", " << children.back()->getID(); + res << ")"; + return res.str(); + } + +protected: + Block readImpl() override + { + /// Если child - RemoteBlockInputStream, то отправляет запрос на все удалённые серверы, инициируя вычисления. + if (current_bucket_num == -1) + for (auto & child : children) + child->readPrefix(); + + /// Всё прочитали. + if (current_bucket_num > 255) + return {}; + + /// Читаем следующие блоки для current_bucket_num + for (size_t i = 0, size = children.size(); i < size; ++i) + { + while (!is_exhausted[i] && (!current_blocks[i] || current_blocks[i].info.bucket_num < current_bucket_num)) + { + current_blocks[i] = children[i]->read(); + + if (!current_blocks[i]) + { + is_exhausted[i] = true; + } + else if (current_blocks[i].info.is_overflows) + { + overflow_blocks[i].swap(current_blocks[i]); + } + } + } + + /// Может быть, нет блоков для current_bucket_num, а все блоки имеют больший bucket_num. + Int32 min_bucket_num = 256; + for (size_t i = 0, size = children.size(); i < size; ++i) + if (!is_exhausted[i] && current_blocks[i].info.bucket_num < min_bucket_num) + min_bucket_num = current_blocks[i].info.bucket_num; + + current_bucket_num = min_bucket_num; + + /// Все потоки исчерпаны. + if (current_bucket_num > 255) + return {}; /// TODO overflow_blocks. + + /// TODO Если есть single_level и two_level блоки. + + /// Объединяем все блоки с current_bucket_num. + + BlocksList blocks_to_merge; + for (size_t i = 0, size = children.size(); i < size; ++i) + if (current_blocks[i].info.bucket_num == current_bucket_num) + blocks_to_merge.emplace_back(std::move(current_blocks[i])); + + Block res = aggregator.mergeBlocks(blocks_to_merge, final); + + ++current_bucket_num; + return res; + } + +private: + Aggregator aggregator; + bool final; + + Int32 current_bucket_num = -1; + std::vector current_blocks; + std::vector is_exhausted; + + std::vector overflow_blocks; +}; + +} diff --git a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h index 02ac9c225f4..da58b0b158f 100644 --- a/dbms/include/DB/DataStreams/RemoteBlockInputStream.h +++ b/dbms/include/DB/DataStreams/RemoteBlockInputStream.h @@ -111,6 +111,14 @@ public: parallel_replicas->disconnect(); } + + /// Отправляет запрос (инициирует вычисления) раньше, чем read. + void readPrefix() override + { + if (!sent_query) + sendQuery(); + } + protected: /// Отправить на удаленные реплики все временные таблицы void sendExternalTables() @@ -147,19 +155,10 @@ protected: { if (!sent_query) { - createParallelReplicas(); + sendQuery(); if (settings.skip_unavailable_shards && 0 == parallel_replicas->size()) - return Block(); - - established = true; - - parallel_replicas->sendQuery(query, "", stage, true); - - established = false; - sent_query = true; - - sendExternalTables(); + return {}; } while (true) @@ -280,6 +279,23 @@ protected: } private: + void sendQuery() + { + createParallelReplicas(); + + if (settings.skip_unavailable_shards && 0 == parallel_replicas->size()) + return; + + established = true; + + parallel_replicas->sendQuery(query, "", stage, true); + + established = false; + sent_query = true; + + sendExternalTables(); + } + /// ITable::read requires a Context, therefore we should create one if the user can't supply it static Context & getDefaultContext() { diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 5459c06b82b..65fbdce1900 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -674,17 +674,6 @@ typedef SharedPtr AggregatedDataVariantsPtr; typedef std::vector ManyAggregatedDataVariants; -/** Достать вариант агрегации по его типу. */ -template Method & getDataVariant(AggregatedDataVariants & variants); - -#define M(NAME, IS_TWO_LEVEL) \ - template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant(AggregatedDataVariants & variants) { return *variants.NAME; } - -APPLY_FOR_AGGREGATED_VARIANTS(M) - -#undef M - - /** Агрегирует источник блоков. */ class Aggregator @@ -733,11 +722,15 @@ public: */ AggregatedDataVariantsPtr merge(ManyAggregatedDataVariants & data_variants, size_t max_threads); - /** Объединить несколько агрегированных блоков в одну структуру данных. + /** Объединить поток частично агрегированных блоков в одну структуру данных. * (Доагрегировать несколько блоков, которые представляют собой результат независимых агрегаций с удалённых серверов.) */ void mergeStream(BlockInputStreamPtr stream, AggregatedDataVariants & result, size_t max_threads); + /** Объединить несколько частично агрегированных блоков в один. + */ + Block mergeBlocks(BlocksList & blocks, bool final); + using CancellationHook = std::function; /** Установить функцию, которая проверяет, можно ли прервать текущую задачу. @@ -974,4 +967,15 @@ protected: }; +/** Достать вариант агрегации по его типу. */ +template Method & getDataVariant(AggregatedDataVariants & variants); + +#define M(NAME, IS_TWO_LEVEL) \ + template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant(AggregatedDataVariants & variants) { return *variants.NAME; } + +APPLY_FOR_AGGREGATED_VARIANTS(M) + +#undef M + + } diff --git a/dbms/include/DB/Interpreters/Settings.h b/dbms/include/DB/Interpreters/Settings.h index 2bc4c0bca4a..36d4fc9db6a 100644 --- a/dbms/include/DB/Interpreters/Settings.h +++ b/dbms/include/DB/Interpreters/Settings.h @@ -91,6 +91,8 @@ struct Settings M(SettingUInt64, min_count_to_compile, 3) \ /** При каком количестве ключей, начинает использоваться двухуровневая агрегация. 0 - никогда не использовать. */ \ M(SettingUInt64, group_by_two_level_threshold, 100000) \ + /** Включён ли экономный по памяти режим распределённой агрегации. */ \ + M(SettingBool, distributed_aggregation_memory_efficient, false) \ \ /** Максимальное количество используемых реплик каждого шарда при выполнении запроса */ \ M(SettingUInt64, max_parallel_replicas, 1) \ diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index fa164d12a18..e51fe1141d6 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -141,9 +141,12 @@ void Block::insertUnique(const ColumnWithTypeAndName & elem) void Block::erase(size_t position) { + if (index_by_position.empty()) + throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND); + if (position >= index_by_position.size()) throw Exception("Position out of bound in Block::erase(), max position = " - + toString(index_by_position.size()), ErrorCodes::POSITION_OUT_OF_BOUND); + + toString(index_by_position.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); Container_t::iterator it = index_by_position[position]; index_by_name.erase(index_by_name.find(it->name)); @@ -177,6 +180,9 @@ void Block::erase(const String & name) ColumnWithTypeAndName & Block::getByPosition(size_t position) { + if (index_by_position.empty()) + throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND); + if (position >= index_by_position.size()) throw Exception("Position " + toString(position) + " is out of bound in Block::getByPosition(), max position = " @@ -189,6 +195,9 @@ ColumnWithTypeAndName & Block::getByPosition(size_t position) const ColumnWithTypeAndName & Block::getByPosition(size_t position) const { + if (index_by_position.empty()) + throw Exception("Block is empty", ErrorCodes::POSITION_OUT_OF_BOUND); + if (position >= index_by_position.size()) throw Exception("Position " + toString(position) + " is out of bound in Block::getByPosition(), max position = " diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 74bffca822a..f21656a54de 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -1688,6 +1689,66 @@ void Aggregator::mergeStream(BlockInputStreamPtr stream, AggregatedDataVariants } +Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) +{ + if (blocks.empty()) + return {}; + + StringRefs key(keys_size); + ConstColumnPlainPtrs key_columns(keys_size); + + AggregateColumnsData aggregate_columns(aggregates_size); + + initialize(blocks.front()); + + /// Каким способом выполнять агрегацию? + for (size_t i = 0; i < keys_size; ++i) + key_columns[i] = sample.getByPosition(i).column; + + Sizes key_sizes; + AggregatedDataVariants::Type method = chooseAggregationMethod(key_columns, key_sizes); + + /// Временные данные для агрегации. + AggregatedDataVariants result; + + /// result будет уничтожать состояния агрегатных функций в деструкторе + result.aggregator = this; + + result.init(method); + result.keys_size = keys_size; + result.key_sizes = key_sizes; + + LOG_TRACE(log, "Merging partially aggregated blocks."); + + for (Block & block : blocks) + { + if (result.type == AggregatedDataVariants::Type::without_key || block.info.is_overflows) + mergeWithoutKeyStreamsImpl(block, result); + + #define M(NAME, IS_TWO_LEVEL) \ + else if (result.type == AggregatedDataVariants::Type::NAME) \ + mergeStreamsImpl(block, key_sizes, result.aggregates_pool, *result.NAME, result.NAME->data); + + APPLY_FOR_AGGREGATED_VARIANTS(M) + #undef M + else if (result.type != AggregatedDataVariants::Type::without_key) + throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + } + + BlocksList merged_block = convertToBlocks(result, final, 1); + + if (merged_block.size() > 1) /// TODO overflows + throw Exception("Logical error: temporary result is not single-level", ErrorCodes::LOGICAL_ERROR); + + LOG_TRACE(log, "Merged partially aggregated blocks."); + + if (merged_block.empty()) + return {}; + + return merged_block.front(); +} + + template void NO_INLINE Aggregator::destroyImpl( Method & method) const @@ -1769,4 +1830,5 @@ void Aggregator::setCancellationHook(const CancellationHook cancellation_hook) isCancelled = cancellation_hook; } + } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index a6c66a015d3..6272434ee48 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -856,14 +857,38 @@ void InterpreterSelectQuery::executeAggregation(ExpressionActionsPtr expression, void InterpreterSelectQuery::executeMergeAggregated(bool overflow_row, bool final) { - /// Склеим несколько источников в один - executeUnion(); - - /// Теперь объединим агрегированные блоки Names key_names; AggregateDescriptions aggregates; query_analyzer->getAggregateInfo(key_names, aggregates); - streams[0] = new MergingAggregatedBlockInputStream(streams[0], key_names, aggregates, overflow_row, final, original_max_threads); + + /** Есть два режима распределённой агрегации. + * + * 1. В разных потоках читать из удалённых серверов блоки. + * Сохранить все блоки в оперативку. Объединить блоки. + * Если агрегация двухуровневая - распараллелить по номерам корзин. + * + * 2. В одном потоке читать по очереди блоки с разных серверов. + * В оперативке хранится только по одному блоку с каждого сервера. + * Если агрегация двухуровневая - последовательно объединяем блоки каждого следующего уровня. + * + * Второй вариант расходует меньше памяти (до 256 раз меньше) + * в случае двухуровневой агрегации, которая используется для больших результатов после GROUP BY, + * но при этом может работать медленнее. + */ + + if (!settings.distributed_aggregation_memory_efficient) + { + /// Склеим несколько источников в один, распараллеливая работу. + executeUnion(); + + /// Теперь объединим агрегированные блоки + streams[0] = new MergingAggregatedBlockInputStream(streams[0], key_names, aggregates, overflow_row, final, original_max_threads); + } + else + { + streams[0] = new MergingAggregatedMemoryEfficientBlockInputStream(streams, key_names, aggregates, overflow_row, final); + streams.resize(1); + } } From b32721432bc78bd7077b4414319418304afd2b39 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 3 Aug 2015 20:32:37 +0300 Subject: [PATCH 39/88] dbms: Server: Added stateless functional test (stateful one is in progress). [#METR-17276] --- .../00211_aggregate_function_uniq.reference | 416 ++++++++++++++++++ .../00211_aggregate_function_uniq.sql | 35 ++ 2 files changed, 451 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference new file mode 100644 index 00000000000..64f3c19bb38 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference @@ -0,0 +1,416 @@ +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 159 +1 164 +3 165 +6 162 +7 160 +9 164 +10 81 +11 158 +13 161 +14 160 +17 163 +19 164 +20 159 +21 161 +22 159 +26 160 +31 164 +35 160 +36 161 +0 54571 +1 55013 +3 52912 +6 52353 +7 54011 +9 54138 +10 26870 +11 54554 +13 53951 +14 53396 +17 55227 +19 55115 +20 54370 +21 54268 +22 54620 +26 53394 +31 54151 +35 54328 +36 52997 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 161 +0.125 160 +0.5 164 +0.05 164 +0.143 162 +0.091 81 +0.056 163 +0.048 159 +0.083 158 +0.25 165 +1 159 +0.1 164 +0.028 160 +0.027 161 +0.031 164 +0.067 160 +0.043 159 +0.037 160 +0.071 161 +0.045 54268 +0.125 54011 +0.5 55013 +0.05 55115 +0.143 52353 +0.091 26870 +0.056 55227 +0.048 54370 +0.083 54554 +0.25 52912 +1 54571 +0.1 54138 +0.028 54328 +0.027 52997 +0.031 54151 +0.067 53396 +0.043 54620 +0.037 53394 +0.071 53951 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 164 +0.05 164 +0.25 165 +0.048 159 +0.091 81 +0.043 159 +0.071 161 +0.083 158 +0.125 160 +0.031 164 +0.143 162 +0.028 160 +0.067 160 +0.045 161 +0.027 161 +0.056 163 +0.037 160 +0.1 164 +1 159 +0.5 55013 +0.05 55115 +0.25 52912 +0.048 54370 +0.091 26870 +0.043 54620 +0.071 53951 +0.083 54554 +0.125 54011 +0.031 54151 +0.143 52353 +0.028 54328 +0.067 53396 +0.045 54268 +0.027 52997 +0.056 55227 +0.037 53394 +0.1 54138 +1 54571 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 158 +3 162 +6 163 +7 162 +9 162 +10 79 +11 162 +13 163 +14 160 +17 163 +19 158 +20 162 +21 157 +22 164 +26 162 +31 161 +35 162 +36 163 +0 54029 +1 53772 +3 53540 +6 54012 +7 53910 +9 52761 +10 26462 +11 52701 +13 54505 +14 53790 +17 54064 +19 55420 +20 56686 +21 52639 +22 54251 +26 53827 +31 53574 +35 55022 +36 53961 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54053 +6 54054 +7 54054 +9 54053 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54053 +22 54054 +26 54054 +31 54054 +35 54054 +36 54053 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54053 +0.125 54054 +0.5 54054 +0.05 54054 +0.143 54054 +0.091 27027 +0.056 54054 +0.048 54054 +0.083 54055 +0.25 54053 +1 54054 +0.1 54053 +0.028 54054 +0.027 54053 +0.031 54054 +0.067 54054 +0.043 54054 +0.037 54054 +0.071 54054 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54054 +0.05 54054 +0.25 54053 +0.048 54054 +0.091 27027 +0.043 54054 +0.071 54054 +0.083 54055 +0.125 54054 +0.031 54054 +0.143 54054 +0.028 54054 +0.067 54054 +0.045 54053 +0.027 54053 +0.056 54054 +0.037 54054 +0.1 54053 +1 54054 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54054 +9 54054 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54054 +22 54054 +26 54054 +31 54054 +35 54054 +36 54054 diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql new file mode 100644 index 00000000000..2886daeb3b3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql @@ -0,0 +1,35 @@ +/* uniqHLL12 */ + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +/* uniqCombined */ + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; From 70f4887d747620d53fe7bb842578adac39ed0efb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 3 Aug 2015 23:11:57 +0300 Subject: [PATCH 40/88] dbms: tiny modifications [#METR-2944]. --- dbms/src/Core/Block.cpp | 2 +- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index fa164d12a18..d14cec43222 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -308,7 +308,7 @@ std::string Block::dumpStructure() const if (it->column) res << ' ' << it->column->getName() << ' ' << it->column->size(); else - res << "nullptr"; + res << " nullptr"; } return res.str(); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index a6c66a015d3..9430247bafd 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -403,7 +403,7 @@ void InterpreterSelectQuery::executeSingleQuery() bool has_having = false; bool has_order_by = false; - ExpressionActionsPtr before_join; + ExpressionActionsPtr before_join; /// включая JOIN ExpressionActionsPtr before_where; ExpressionActionsPtr before_aggregation; ExpressionActionsPtr before_having; From 99aec7b6c132aa86e2c70216dd49c93fa7585bc3 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 4 Aug 2015 14:22:13 +0300 Subject: [PATCH 41/88] dbms: Server: Fixed timeout value. [#METR-14410] --- dbms/src/Client/ParallelReplicas.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dbms/src/Client/ParallelReplicas.cpp b/dbms/src/Client/ParallelReplicas.cpp index 135c1b06aa0..538d4cf2caf 100644 --- a/dbms/src/Client/ParallelReplicas.cpp +++ b/dbms/src/Client/ParallelReplicas.cpp @@ -263,9 +263,8 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() Poco::Net::Socket::SocketList read_list; read_list.reserve(active_replica_count); - /** Сначала проверяем, есть ли данные, которые уже лежат в буфере - * хоть одного соединения. - */ + /// Сначала проверяем, есть ли данные, которые уже лежат в буфере + /// хоть одного соединения. for (auto & e : replica_map) { Connection * connection = e.second; @@ -273,9 +272,8 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() read_list.push_back(connection->socket); } - /** Если не было найдено никаких данных, то проверяем, есть ли соединения - * готовые для чтения. - */ + /// Если не было найдено никаких данных, то проверяем, есть ли соединения + /// готовые для чтения. if (read_list.empty()) { Poco::Net::Socket::SocketList write_list; @@ -287,7 +285,7 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() if (connection != nullptr) read_list.push_back(connection->socket); } - int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->poll_interval * 1000000); + int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->receive_timeout); if (n == 0) return replica_map.end(); } From 58e2a7ef6b7fb6d1b81775c1339020ecf6e6d970 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 4 Aug 2015 15:33:08 +0300 Subject: [PATCH 42/88] dbms: Server: Removed useless hashing in the case of 32-bit floating point values. [#METR-17276] --- .../DB/AggregateFunctions/AggregateFunctionUniq.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 20b804d08a5..c581a811956 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -183,7 +183,7 @@ namespace detail }; template - struct CombinedCardinalityTraits::value>::type> + struct CombinedCardinalityTraits::value>::type> { using Op = Hash64To32; @@ -195,6 +195,17 @@ namespace detail } }; + template + struct CombinedCardinalityTraits::value>::type> + { + static UInt32 hash(T key) + { + UInt32 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return res; + } + }; + /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. * Используется для частичной специализации для добавления строк. */ From 891aed0744729a6f45ec3a519d9c13162795a762 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 4 Aug 2015 16:37:56 +0300 Subject: [PATCH 43/88] dbms: Server: Cosmetic changes. [#METR-17276] --- .../AggregateFunctionUniq.h | 280 +++++++++--------- 1 file changed, 145 insertions(+), 135 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index c581a811956..08f28a14be8 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -25,32 +25,7 @@ namespace DB { - -template struct AggregateFunctionUniqTraits -{ - static UInt64 hash(T x) { return x; } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float32 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float64 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - +/// uniq struct AggregateFunctionUniqUniquesHashSetData { @@ -60,6 +35,7 @@ struct AggregateFunctionUniqUniquesHashSetData static String getName() { return "uniq"; } }; +/// uniqHLL12 template struct AggregateFunctionUniqHLL12Data @@ -79,6 +55,7 @@ struct AggregateFunctionUniqHLL12Data static String getName() { return "uniqHLL12"; } }; +/// uniqExact template struct AggregateFunctionUniqExactData @@ -140,139 +117,172 @@ struct AggregateFunctionUniqCombinedData namespace detail { - template - struct Hash64To32; - template - struct Hash64To32::value || std::is_same::value>::type> +/** Хэширование 64-битных целочисленных значений в 32-битные. + * Источник: https://gist.github.com/badboy/6267743 + */ +template +struct Hash64To32; + +template +struct Hash64To32::value || std::is_same::value>::type> +{ + static UInt32 compute(T key) { - /// https://gist.github.com/badboy/6267743 - static UInt32 compute(T key) - { - using U = typename std::make_unsigned::type; - auto x = static_cast(key); + using U = typename std::make_unsigned::type; + auto x = static_cast(key); - x = (~x) + (x << 18); - x = x ^ (x >> 31); - x = x * 21; - x = x ^ (x >> 11); - x = x + (x << 6); - x = x ^ (x >> 22); - return static_cast(x); - } - }; + x = (~x) + (x << 18); + x = x ^ (x >> 31); + x = x * 21; + x = x ^ (x >> 11); + x = x + (x << 6); + x = x ^ (x >> 22); + return static_cast(x); + } +}; - template - struct CombinedCardinalityTraits +/** Хэш-функция для uniqCombined. + */ +template +struct CombinedCardinalityTraits +{ + static UInt32 hash(T key) { - static UInt32 hash(T key) - { - return key; - } - }; + return key; + } +}; - template - struct CombinedCardinalityTraits::value || std::is_same::value>::type> +template +struct CombinedCardinalityTraits::value || std::is_same::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - using Op = Hash64To32; - - static UInt32 hash(T key) - { - return Op::compute(key); - }; + return Op::compute(key); }; +}; - template - struct CombinedCardinalityTraits::value>::type> +template +struct CombinedCardinalityTraits::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - using Op = Hash64To32; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return Op::compute(res); + } +}; - static UInt32 hash(T key) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); - return Op::compute(res); - } - }; - - template - struct CombinedCardinalityTraits::value>::type> +template +struct CombinedCardinalityTraits::value>::type> +{ + static UInt32 hash(T key) { - static UInt32 hash(T key) - { - UInt32 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); - return res; - } - }; + UInt32 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return res; + } +}; - /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. - * Используется для частичной специализации для добавления строк. - */ - template - struct OneAdder +/** Хэш-функция для uniq. + */ +template struct AggregateFunctionUniqTraits +{ + static UInt64 hash(T x) { return x; } +}; + +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float32 x) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); - } - }; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; - template - struct OneAdder +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float64 x) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - /// Имейте ввиду, что вычисление приближённое. - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; - template - struct OneAdder > +/** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. + * Используется для частичной специализации для добавления строк. + */ +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - data.set.insert(static_cast &>(column).getData()[row_num]); - } - }; + data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + } +}; - template<> - struct OneAdder > +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); + /// Имейте ввиду, что вычисление приближённое. + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; - UInt128 key; - SipHash hash; - hash.update(value.data, value.size); - hash.get128(key.first, key.second); - - data.set.insert(key); - } - }; - - template - struct OneAdder > +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - const auto & value = static_cast &>(column).getData()[row_num]; - data.set.insert(CombinedCardinalityTraits::hash(value)); - } - }; + data.set.insert(static_cast &>(column).getData()[row_num]); + } +}; - template<> - struct OneAdder > +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + StringRef value = column.getDataAt(row_num); + + UInt128 key; + SipHash hash; + hash.update(value.data, value.size); + hash.get128(key.first, key.second); + + data.set.insert(key); + } +}; + +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + const auto & value = static_cast &>(column).getData()[row_num]; + data.set.insert(CombinedCardinalityTraits::hash(value)); + } +}; + +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; + } From e9c504e9da30523569ba781e4b5ae3f5fbea2756 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Aug 2015 21:55:53 +0300 Subject: [PATCH 44/88] dbms: modified exception message for ParallelReplicas [#MTRSADMIN-1462]. --- dbms/src/Client/ParallelReplicas.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dbms/src/Client/ParallelReplicas.cpp b/dbms/src/Client/ParallelReplicas.cpp index 538d4cf2caf..0702482f4c9 100644 --- a/dbms/src/Client/ParallelReplicas.cpp +++ b/dbms/src/Client/ParallelReplicas.cpp @@ -214,7 +214,7 @@ Connection::Packet ParallelReplicas::receivePacketUnlocked() auto it = getReplicaForReading(); if (it == replica_map.end()) - throw Exception("No available replica", ErrorCodes::NO_AVAILABLE_REPLICA); + throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA); Connection * connection = it->second; Connection::Packet packet = connection->receivePacket(); @@ -285,9 +285,17 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() if (connection != nullptr) read_list.push_back(connection->socket); } + int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->receive_timeout); + if (n == 0) - return replica_map.end(); + { + std::stringstream description; + for (auto it = replica_map.begin(); it != replica_map.end(); ++it) + description << (it != replica_map.begin() ? ", " : "") << it->second->getDescription(); + + throw Exception("Timeout exceeded while reading from " + description.str(), ErrorCodes::TIMEOUT_EXCEEDED); + } } auto & socket = read_list[rand() % read_list.size()]; From 4ab00524564fa94c3f3f991a21e38539ae294396 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 00:38:31 +0300 Subject: [PATCH 45/88] dbms: more compact formatting of queries with aliases: development [#METR-17606]. --- dbms/include/DB/Parsers/ASTAlterQuery.h | 84 ++++++++++ dbms/include/DB/Parsers/ASTAsterisk.h | 6 + dbms/include/DB/Parsers/ASTCheckQuery.h | 22 +++ .../include/DB/Parsers/ASTColumnDeclaration.h | 20 +++ dbms/include/DB/Parsers/ASTCreateQuery.h | 67 ++++++++ dbms/include/DB/Parsers/ASTDropQuery.h | 19 +++ dbms/include/DB/Parsers/ASTExpressionList.h | 34 ++++ dbms/include/DB/Parsers/ASTFunction.h | 3 + dbms/include/DB/Parsers/ASTIdentifier.h | 23 +++ dbms/include/DB/Parsers/ASTInsertQuery.h | 37 +++++ dbms/include/DB/Parsers/ASTJoin.h | 31 ++++ dbms/include/DB/Parsers/ASTLiteral.h | 16 ++ dbms/include/DB/Parsers/ASTNameTypePair.h | 9 ++ dbms/include/DB/Parsers/ASTOptimizeQuery.h | 7 + dbms/include/DB/Parsers/ASTOrderByElement.h | 12 ++ .../DB/Parsers/ASTQueryWithTableAndOutput.h | 22 +-- dbms/include/DB/Parsers/ASTRenameQuery.h | 16 ++ dbms/include/DB/Parsers/ASTSelectQuery.h | 3 + dbms/include/DB/Parsers/IAST.h | 64 ++++++++ dbms/include/DB/Parsers/formatAST.h | 2 +- dbms/src/Parsers/ASTSelectQuery.cpp | 148 ++++++++++++++++++ dbms/src/Parsers/formatAST.cpp | 27 +++- 22 files changed, 657 insertions(+), 15 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index 1d22d25e186..9bd8356762c 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -1,6 +1,8 @@ #pragma once #include +#include + namespace DB { @@ -98,5 +100,87 @@ public: } return res; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : ""); + + if (!table.empty()) + { + if (!database.empty()) + { + settings.ostr << indent_str << database; + settings.ostr << "."; + } + settings.ostr << indent_str << table; + } + settings.ostr << nl_or_ws; + + for (size_t i = 0; i < parameters.size(); ++i) + { + const ASTAlterQuery::Parameters & p = parameters[i]; + + if (p.type == ASTAlterQuery::ADD_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : ""); + p.col_decl->formatImpl(settings, state, frame); + + /// AFTER + if (p.column) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + p.column->formatImpl(settings, state, frame); + } + } + else if (p.type == ASTAlterQuery::DROP_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP COLUMN " << (settings.hilite ? hilite_none : ""); + p.column->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::MODIFY_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : ""); + p.col_decl->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::DROP_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (p.detach ? "DETACH" : "DROP") << " PARTITION " + << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::ATTACH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " << (p.unreplicated ? "UNREPLICATED " : "") + << (p.part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::FETCH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " << (p.unreplicated ? "UNREPLICATED " : "") + << "PARTITION " << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " FROM " << (settings.hilite ? hilite_none : "") << mysqlxx::quote << p.from; + } + else if (p.type == ASTAlterQuery::FREEZE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + } + else + throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + + std::string comma = (i < (parameters.size() -1) ) ? "," : ""; + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << comma << (settings.hilite ? hilite_none : ""); + + settings.ostr << settings.nl_or_ws; + } + } }; + } diff --git a/dbms/include/DB/Parsers/ASTAsterisk.h b/dbms/include/DB/Parsers/ASTAsterisk.h index aa90d676c71..f0741539267 100644 --- a/dbms/include/DB/Parsers/ASTAsterisk.h +++ b/dbms/include/DB/Parsers/ASTAsterisk.h @@ -16,6 +16,12 @@ public: String getID() const override { return "Asterisk"; } ASTPtr clone() const override { return new ASTAsterisk(*this); } String getColumnName() const override { return "*"; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << "*"; + } }; } diff --git a/dbms/include/DB/Parsers/ASTCheckQuery.h b/dbms/include/DB/Parsers/ASTCheckQuery.h index 901ad7ef567..74b64aeaef8 100644 --- a/dbms/include/DB/Parsers/ASTCheckQuery.h +++ b/dbms/include/DB/Parsers/ASTCheckQuery.h @@ -19,6 +19,28 @@ struct ASTCheckQuery : public IAST std::string database; std::string table; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + std::string nl_or_nothing = settings.one_line ? "" : "\n"; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string nl_or_ws = settings.one_line ? " " : "\n"; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : ""); + + if (!table.empty()) + { + if (!database.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << database << (settings.hilite ? hilite_none : ""); + settings.ostr << "."; + } + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << table << (settings.hilite ? hilite_none : ""); + } + settings.ostr << nl_or_ws; + } }; } diff --git a/dbms/include/DB/Parsers/ASTColumnDeclaration.h b/dbms/include/DB/Parsers/ASTColumnDeclaration.h index 9862c5a81b6..722bc6d8283 100644 --- a/dbms/include/DB/Parsers/ASTColumnDeclaration.h +++ b/dbms/include/DB/Parsers/ASTColumnDeclaration.h @@ -40,6 +40,26 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << settings.nl_or_ws << indent_str << backQuoteIfNeed(name); + if (type) + { + settings.ostr << ' '; + type->formatImpl(settings, state, frame); + } + + if (default_expression) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; + default_expression->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTCreateQuery.h b/dbms/include/DB/Parsers/ASTCreateQuery.h index 33d69a8e8e3..57cfc1a35b2 100644 --- a/dbms/include/DB/Parsers/ASTCreateQuery.h +++ b/dbms/include/DB/Parsers/ASTCreateQuery.h @@ -48,6 +48,73 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + if (!database.empty() && table.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << (attach ? "ATTACH DATABASE " : "CREATE DATABASE ") << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(database); + return; + } + + { + std::string what = "TABLE"; + if (is_view) + what = "VIEW"; + if (is_materialized_view) + what = "MATERIALIZED VIEW"; + + settings.ostr + << (settings.hilite ? hilite_keyword : "") + << (attach ? "ATTACH " : "CREATE ") + << (is_temporary ? "TEMPORARY " : "") + << what + << " " << (if_not_exists ? "IF NOT EXISTS " : "") + << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + } + + if (!as_table.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") + << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); + } + + if (columns) + { + settings.ostr << (settings.one_line ? " (" : "\n("); + ++frame.indent; + columns->formatImpl(settings, state, frame); + settings.ostr << (settings.one_line ? ")" : "\n)"); + } + + if (storage && !is_materialized_view && !is_view) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ENGINE" << (settings.hilite ? hilite_none : "") << " = "; + storage->formatImpl(settings, state, frame); + } + + if (inner_storage) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ENGINE" << (settings.hilite ? hilite_none : "") << " = "; + inner_storage->formatImpl(settings, state, frame); + } + + if (is_populate) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); + } + + if (select) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); + select->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTDropQuery.h b/dbms/include/DB/Parsers/ASTDropQuery.h index c0ac24017d0..897f9afd4c6 100644 --- a/dbms/include/DB/Parsers/ASTDropQuery.h +++ b/dbms/include/DB/Parsers/ASTDropQuery.h @@ -24,6 +24,25 @@ public: String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; }; ASTPtr clone() const override { return new ASTDropQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (table.empty() && !database.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") + << (detach ? "DETACH DATABASE " : "DROP DATABASE ") + << (if_exists ? "IF EXISTS " : "") + << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(database); + return; + } + + settings.ostr << (settings.hilite ? hilite_keyword : "") + << (detach ? "DETACH TABLE " : "DROP TABLE ") + << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + } }; } diff --git a/dbms/include/DB/Parsers/ASTExpressionList.h b/dbms/include/DB/Parsers/ASTExpressionList.h index 1ec814a8d1b..1a2fdb19cb7 100644 --- a/dbms/include/DB/Parsers/ASTExpressionList.h +++ b/dbms/include/DB/Parsers/ASTExpressionList.h @@ -31,6 +31,40 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + { + if (it != children.begin()) + settings.ostr << ", "; + + (*it)->formatImpl(settings, state, frame); + } + } + + + friend class ASTSelectQuery; + + /** Вывести список выражений в секциях запроса SELECT - по одному выражению на строку. + */ + void formatImplMultiline(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const + { + std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' '); + + ++frame.indent; + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + { + if (it != children.begin()) + settings.ostr << ", "; + + if (children.size() > 1) + settings.ostr << indent_str; + + (*it)->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 70380b67e7e..3801a32e638 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -82,6 +82,9 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 58ae38ca434..550973f298f 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -41,6 +42,28 @@ public: { set.insert(name); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + settings.ostr << (settings.hilite ? hilite_identifier : ""); + + WriteBufferFromOStream wb(settings.ostr, 32); + writeProbablyBackQuotedString(name, wb); + wb.next(); + + settings.ostr << (settings.hilite ? hilite_none : ""); + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTInsertQuery.h b/dbms/include/DB/Parsers/ASTInsertQuery.h index 5e6988bcfc6..c7a1879b55d 100644 --- a/dbms/include/DB/Parsers/ASTInsertQuery.h +++ b/dbms/include/DB/Parsers/ASTInsertQuery.h @@ -42,6 +42,43 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << "INSERT INTO " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + if (!insert_id.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ID = " << (settings.hilite ? hilite_none : "") + << mysqlxx::quote << insert_id; + + if (columns) + { + settings.ostr << " ("; + columns->formatImpl(settings, state, frame); + settings.ostr << ")"; + } + + if (select) + { + settings.ostr << " "; + select->formatImpl(settings, state, frame); + } + else + { + if (!format.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format; + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : ""); + } + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 96f161c5c60..029d4f49350 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -83,6 +83,37 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + if (locality == ASTJoin::Global) + settings.ostr << "GLOBAL "; + + if (kind != ASTJoin::Cross) + settings.ostr << (strictness == ASTJoin::Any ? "ANY " : "ALL "); + + settings.ostr << (kind == ASTJoin::Inner ? "INNER " + : (kind == ASTJoin::Left ? "LEFT " + : (kind == ASTJoin::Right ? "RIGHT " + : (kind == ASTJoin::Cross ? "CROSS " + : "FULL OUTER ")))); + + settings.ostr << "JOIN " + << (settings.hilite ? hilite_none : ""); + + table->formatImpl(settings, state, frame); + + if (kind != ASTJoin::Cross) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : ""); + using_expr_list->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index 2a610255be8..c5f1ec91ed4 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -26,6 +26,22 @@ public: String getID() const override { return "Literal_" + apply_visitor(FieldVisitorDump(), value); } ASTPtr clone() const override { return new ASTLiteral(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (frame.need_parens && !alias.empty()) + settings.ostr <<'('; + + settings.ostr <formatImpl(settings, state, frame); + } }; } diff --git a/dbms/include/DB/Parsers/ASTOptimizeQuery.h b/dbms/include/DB/Parsers/ASTOptimizeQuery.h index 906b3d1edb5..2fb6921d2ed 100644 --- a/dbms/include/DB/Parsers/ASTOptimizeQuery.h +++ b/dbms/include/DB/Parsers/ASTOptimizeQuery.h @@ -22,6 +22,13 @@ public: String getID() const override { return "OptimizeQuery_" + database + "_" + table; }; ASTPtr clone() const override { return new ASTOptimizeQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + } }; } diff --git a/dbms/include/DB/Parsers/ASTOrderByElement.h b/dbms/include/DB/Parsers/ASTOrderByElement.h index f341265d93b..d7103a48e64 100644 --- a/dbms/include/DB/Parsers/ASTOrderByElement.h +++ b/dbms/include/DB/Parsers/ASTOrderByElement.h @@ -29,6 +29,18 @@ public: String getID() const override { return "OrderByElement"; } ASTPtr clone() const override { return new ASTOrderByElement(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + children.front()->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << (direction == -1 ? " DESC" : " ASC") << (settings.hilite ? hilite_none : ""); + if (!collator.isNull()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " COLLATE " << (settings.hilite ? hilite_none : "") + << "'" << collator->getLocale() << "'"; + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h index 32ebb1e528d..015cb010ead 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h @@ -8,17 +8,17 @@ namespace DB { - /** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. - */ - class ASTQueryWithTableAndOutput : public ASTQueryWithOutput - { - public: - String database; - String table; - - ASTQueryWithTableAndOutput() = default; - ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} - }; +/** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. + */ +class ASTQueryWithTableAndOutput : public ASTQueryWithOutput +{ +public: + String database; + String table; + + ASTQueryWithTableAndOutput() = default; + ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} +}; /// Объявляет класс-наследник ASTQueryWithTableAndOutput с реализованными методами getID и clone. diff --git a/dbms/include/DB/Parsers/ASTRenameQuery.h b/dbms/include/DB/Parsers/ASTRenameQuery.h index 4eb6624e4c3..ffb59c3f0f8 100644 --- a/dbms/include/DB/Parsers/ASTRenameQuery.h +++ b/dbms/include/DB/Parsers/ASTRenameQuery.h @@ -34,6 +34,22 @@ public: String getID() const override { return "Rename"; }; ASTPtr clone() const override { return new ASTRenameQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "RENAME TABLE " << (settings.hilite ? hilite_none : ""); + + for (ASTRenameQuery::Elements::const_iterator it = elements.begin(); it != elements.end(); ++it) + { + if (it != elements.begin()) + settings.ostr << ", "; + + settings.ostr << (!it->from.database.empty() ? backQuoteIfNeed(it->from.database) + "." : "") << backQuoteIfNeed(it->from.table) + << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "") + << (!it->to.database.empty() ? backQuoteIfNeed(it->to.database) + "." : "") << backQuoteIfNeed(it->to.table); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index b941046c534..6839f6616c9 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -68,6 +68,9 @@ public: ASTPtr prev_union_all; /// Следующий запрос SELECT в цепочке UNION ALL, если такой есть ASTPtr next_union_all; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; } diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 803cc9eb31f..cb08ef5b408 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -133,6 +133,65 @@ public: (*it)->collectIdentifierNames(set); } + + /// Преобразовать в строку. + + /// Настройки формата. + struct FormatSettings + { + std::ostream & ostr; + bool hilite; + bool one_line; + + char nl_or_ws; + + FormatSettings(std::ostream & ostr_, bool hilite_, bool one_line_) + : ostr(ostr_), hilite(hilite_), one_line(one_line_) + { + nl_or_ws = one_line ? ' ' : '\n'; + } + }; + + /// Состояние. Например, множество узлов DAG, которых мы уже обошли. + struct FormatState + { + /// TODO + }; + + /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. + struct FormatStateStacked + { + bool indent = 0; + bool need_parens = false; + }; + + void format(const FormatSettings & settings) const + { + FormatState state; + formatImpl(settings, state, FormatStateStacked()); + } + +protected: + /// Для подсветки синтаксиса. + static const char * hilite_keyword; + static const char * hilite_identifier; + static const char * hilite_function; + static const char * hilite_operator; + static const char * hilite_alias; + static const char * hilite_none; + + + virtual void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const + { + throw Exception("Unknown element in AST: " + getID() + + ((range.first && (range.second > range.first)) + ? " '" + std::string(range.first, range.second - range.first) + "'" + : ""), + ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + } + + void writeAlias(const String & name, std::ostream & s, bool hilite); + private: size_t checkDepthImpl(size_t max_depth, size_t level) const { @@ -152,4 +211,9 @@ private: typedef SharedPtr ASTPtr; typedef std::vector ASTs; + +/// Квотировать идентификатор обратными кавычками, если это требуется. +String backQuoteIfNeed(const String & x); + + } diff --git a/dbms/include/DB/Parsers/formatAST.h b/dbms/include/DB/Parsers/formatAST.h index 811d946f044..3fdfded2aa3 100644 --- a/dbms/include/DB/Parsers/formatAST.h +++ b/dbms/include/DB/Parsers/formatAST.h @@ -12,7 +12,7 @@ namespace DB /** Берёт синтаксическое дерево и превращает его обратно в текст. * В случае запроса INSERT, данные будут отсутствовать. */ -void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false); +void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false); String formatColumnsForCreateQuery(NamesAndTypesList & columns); diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index ff6f53d5a4e..5da7767dda5 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -1,5 +1,7 @@ +#include #include + namespace DB { @@ -217,5 +219,151 @@ const IAST * ASTSelectQuery::getFormat() const return query->format.get(); } + +void ASTSelectQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override +{ + frame.need_parens = false; + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (settings.hilite ? hilite_none : ""); + + settings.one_line + ? select_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*select_expression_list).formatImplMultiline(settings, state, frame); + + if (table) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FROM " << (settings.hilite ? hilite_none : ""); + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << "."; + } + + if (typeid_cast(&*table)) + { + if (settings.one_line) + settings.ostr << " ("; + else + settings.ostr << "\n" << indent_str << "(\n"; + + table->formatImpl(settings, state, frame); + + if (settings.one_line) + settings.ostr << ")"; + else + settings.ostr << "\n" << indent_str << ")"; + } + else + table->formatImpl(settings, state, frame); + } + + if (final) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FINAL" << (settings.hilite ? hilite_none : ""); + } + + if (sample_size) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SAMPLE " << (settings.hilite ? hilite_none : ""); + sample_size->formatImpl(settings, state, frame); + } + + if (array_join_expression_list) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str + << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (settings.hilite ? hilite_none : ""); + + settings.one_line + ? array_join_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*array_join_expression_list).formatImplMultiline(settings, state, frame); + } + + if (join) + { + settings.ostr << " "; + join->formatImpl(settings, state, frame); + } + + if (prewhere_expression) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "PREWHERE " << (settings.hilite ? hilite_none : ""); + prewhere_expression->formatImpl(settings, state, frame); + } + + if (where_expression) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "WHERE " << (settings.hilite ? hilite_none : ""); + where_expression, s, indent, hilite, settings.one_line); + } + + if (group_expression_list) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "GROUP BY " << (settings.hilite ? hilite_none : ""); + settings.one_line + ? group_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*group_expression_list).formatImplMultiline(settings, state, frame); + } + + if (group_by_with_totals) + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << (settings.one_line ? "" : " ") << "WITH TOTALS" << (settings.hilite ? hilite_none : ""); + + if (having_expression) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "HAVING " << (settings.hilite ? hilite_none : ""); + having_expression->formatImpl(settings, state, frame); + } + + if (order_expression_list) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "ORDER BY " << (settings.hilite ? hilite_none : ""); + settings.one_line + ? order_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*order_expression_list).formatImplMultiline(settings, state, frame); + } + + if (limit_length) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "LIMIT " << (settings.hilite ? hilite_none : ""); + if (limit_offset) + { + limit_offset->formatImpl(settings, state, frame); + settings.ostr << ", "; + } + limit_length->formatImpl(settings, state, frame); + } + + if (settings) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SETTINGS " << (settings.hilite ? hilite_none : ""); + + const ASTSetQuery & ast_set = typeid_cast(*settings); + for (ASTSetQuery::Changes::const_iterator it = ast_set.changes.begin(); it != ast_set.changes.end(); ++it) + { + if (it != ast_set.changes.begin()) + settings.ostr << ", "; + + settings.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); + } + } + + if (format) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); + format->formatImpl(settings, state, frame); + } + + if (next_union_all) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "UNION ALL " << settings.nl_or_ws << settings.ostr << (settings.hilite ? hilite_none : ""); + + // NOTE Мы можем безопасно применить static_cast вместо typeid_cast, потому что знаем, что в цепочке UNION ALL + // имеются только деревья типа SELECT. + const ASTSelectQuery & next_ast = static_cast(*next_union_all); + + next_ast->formatImpl(settings, state, frame); + } +} + }; diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 5d0854827ef..4e0d38ea6b8 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -63,7 +63,7 @@ String backQuoteIfNeed(const String & x) } -static String hightlight(const String & keyword, const String & color_sequence, const bool hilite) +static String highlight(const String & keyword, const String & color_sequence, const bool hilite) { return hilite ? color_sequence + keyword + hilite_none : keyword; } @@ -81,6 +81,21 @@ static void writeAlias(const String & name, std::ostream & s, bool hilite, bool } +struct FormatState +{ + std::ostream & s; + bool hilite; + bool one_line; + + void formatImpl(const IAST & ast, size_t indent, bool need_parens); + + +}; + + + + + void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) @@ -729,7 +744,7 @@ void formatAST(const ASTColumnDeclaration & ast, std::ostream & s, size_t indent if (ast.default_expression) { - s << ' ' << hightlight(ast.default_specifier, hilite_keyword, hilite) << ' '; + s << ' ' << highlight(ast.default_specifier, hilite_keyword, hilite) << ' '; formatAST(*ast.default_expression, s, indent, hilite, one_line); } } @@ -908,10 +923,16 @@ void formatAST(const ASTMultiQuery & ast, std::ostream & s, size_t indent, bool void formatAST(const IAST & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { + FormatState state = { .s = s, .hilite = hilite, .one_line = one_line }; + state.formatImpl(ast, indent, need_parens); +} + +void FormatState::formatImpl(const IAST & ast, size_t indent, bool need_parens) +{ #define DISPATCH(NAME) \ else if (const AST ## NAME * concrete = typeid_cast(&ast)) \ - formatAST(*concrete, s, indent, hilite, one_line, need_parens); + state.formatImpl(*concrete, indent, need_parens); if (false) {} DISPATCH(SelectQuery) From 4eac02304382ab9b3ad7692b9a05cc2ecf58677e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 00:39:42 +0300 Subject: [PATCH 46/88] dbms: addition to prev. revision [#METR-17606]. --- dbms/src/Parsers/ASTFunction.cpp | 197 +++++++++++++++++++++++++++++++ dbms/src/Parsers/IAST.cpp | 39 ++++++ 2 files changed, 236 insertions(+) create mode 100644 dbms/src/Parsers/ASTFunction.cpp create mode 100644 dbms/src/Parsers/IAST.cpp diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp new file mode 100644 index 00000000000..fe85ba99bbb --- /dev/null +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -0,0 +1,197 @@ +#include +#include + + +namespace DB +{ + void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + FormatStateStacked nested_need_parens; + FormatStateStacked nested_dont_need_parens; + nested_need_parens.need_parens = true; + nested_dont_need_parens.need_parens = false; + + /// Стоит ли записать эту функцию в виде оператора? + bool written = false; + if (arguments && !parameters) + { + if (arguments->children.size() == 1) + { + const char * operators[] = + { + "negate", "-", + "not", "NOT ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + + /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: + * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. + * Вместо этого, добавим пробел. + * PS. Нельзя просто попросить добавить скобки - см. formatImpl для ASTLiteral. + */ + if (name == "negate" && typeid_cast(&*arguments->children[0])) + settings.ostr << ' '; + + FormatStateStacked nested_frame; + nested_frame.need_parens = true; + arguments->formatImpl(settings, state, nested_need_parens); + written = true; + } + } + } + + /** need_parens - нужны ли скобки вокруг выражения с оператором. + * Они нужны, только если это выражение входит в другое выражение с оператором. + */ + + if (!written && arguments->children.size() == 2) + { + const char * operators[] = + { + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "like", " LIKE ", + "notLike", " NOT LIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) + { + if (frame.need_parens) + settings.ostr << '('; + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); + if (frame.need_parens) + settings.ostr << ')'; + written = true; + } + } + + if (!written && 0 == strcmp(name.c_str(), "arrayElement")) + { + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + written = true; + } + + if (!written && 0 == strcmp(name.c_str(), "tupleElement")) + { + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); + written = true; + } + } + + if (!written && arguments->children.size() >= 2) + { + const char * operators[] = + { + "and", " AND ", + "or", " OR ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) + { + if (frame.need_parens) + settings.ostr << '('; + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + arguments->children[i]->formatImpl(settings, state, nested_need_parens); + } + if (frame.need_parens) + settings.ostr << ')'; + written = true; + } + } + } + + if (!written && arguments->children.size() >= 1 && 0 == strcmp(name.c_str(), "array")) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + written = true; + } + + if (!written && arguments->children.size() >= 2 && 0 == strcmp(name.c_str(), "tuple")) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); + written = true; + } + } + + if (!written) + { + settings.ostr << (settings.hilite ? hilite_function : "") << name; + + if (parameters) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + parameters->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + if (arguments) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + arguments->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); + } + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } + } +} diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp new file mode 100644 index 00000000000..c8036519eae --- /dev/null +++ b/dbms/src/Parsers/IAST.cpp @@ -0,0 +1,39 @@ +#include +#include + + +namespace DB +{ + +const char * IAST::hilite_keyword = "\033[1m"; +const char * IAST::hilite_identifier = "\033[0;36m"; +const char * IAST::hilite_function = "\033[0;33m"; +const char * IAST::hilite_operator = "\033[1;33m"; +const char * IAST::hilite_alias = "\033[0;32m"; +const char * IAST::hilite_none = "\033[0m"; + + +/// Квотировать идентификатор обратными кавычками, если это требуется. +String backQuoteIfNeed(const String & x) +{ + String res(x.size(), '\0'); + { + WriteBufferFromString wb(res); + writeProbablyBackQuotedString(x, wb); + } + return res; +} + + +void IAST::writeAlias(const String & name, std::ostream & s, bool hilite) +{ + s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); + + WriteBufferFromOStream wb(s, 32); + writeProbablyBackQuotedString(name, wb); + wb.next(); + + s << (hilite ? hilite_none : ""); +} + +} From e072db7da23347730c00f3beabb461011e82da90 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 06:26:27 +0300 Subject: [PATCH 47/88] dbms: preparation to more compact query formatting [#METR-17606]. --- dbms/include/DB/Parsers/ASTAlterQuery.h | 2 +- dbms/include/DB/Parsers/ASTNameTypePair.h | 2 +- dbms/include/DB/Parsers/ASTQueryWithOutput.h | 8 +- .../DB/Parsers/ASTQueryWithTableAndOutput.h | 32 +- dbms/include/DB/Parsers/ASTSet.h | 11 + dbms/include/DB/Parsers/ASTSetQuery.h | 16 +- .../DB/Parsers/ASTShowProcesslistQuery.h | 4 +- dbms/include/DB/Parsers/ASTShowTablesQuery.h | 35 +- dbms/include/DB/Parsers/ASTSubquery.h | 24 + dbms/include/DB/Parsers/ASTUseQuery.h | 9 +- dbms/include/DB/Parsers/IAST.h | 4 +- .../DB/Parsers/TablePropertiesQueriesASTs.h | 18 +- dbms/include/DB/Parsers/formatAST.h | 7 +- dbms/src/Parsers/ASTFunction.cpp | 304 +++--- dbms/src/Parsers/ASTSelectQuery.cpp | 106 +- dbms/src/Parsers/IAST.cpp | 2 +- dbms/src/Parsers/formatAST.cpp | 969 ------------------ 17 files changed, 347 insertions(+), 1206 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index 9bd8356762c..d0e88080071 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -119,7 +119,7 @@ protected: } settings.ostr << indent_str << table; } - settings.ostr << nl_or_ws; + settings.ostr << settings.nl_or_ws; for (size_t i = 0; i < parameters.size(); ++i) { diff --git a/dbms/include/DB/Parsers/ASTNameTypePair.h b/dbms/include/DB/Parsers/ASTNameTypePair.h index e2c392a08ff..4768c7b67a2 100644 --- a/dbms/include/DB/Parsers/ASTNameTypePair.h +++ b/dbms/include/DB/Parsers/ASTNameTypePair.h @@ -40,7 +40,7 @@ protected: { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - s << settings.nl_or_ws << indent_str << backQuoteIfNeed(name) << " "; + settings.ostr << settings.nl_or_ws << indent_str << backQuoteIfNeed(name) << " "; type->formatImpl(settings, state, frame); } }; diff --git a/dbms/include/DB/Parsers/ASTQueryWithOutput.h b/dbms/include/DB/Parsers/ASTQueryWithOutput.h index 92cba621eab..3a0bb4eac0f 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithOutput.h @@ -24,7 +24,7 @@ public: /// Объявляет класс-наследник ASTQueryWithOutput с реализованными методами getID и clone. -#define DEFINE_AST_QUERY_WITH_OUTPUT(Name, ID) \ +#define DEFINE_AST_QUERY_WITH_OUTPUT(Name, ID, Query) \ class Name : public ASTQueryWithOutput \ { \ public: \ @@ -44,6 +44,12 @@ public: \ } \ return ptr; \ } \ +\ +protected: \ + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override \ + { \ + settings.ostr << (settings.hilite ? hilite_keyword : "") << Query << (settings.hilite ? hilite_none : ""); \ + } \ }; } diff --git a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h index 015cb010ead..d99025593d0 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h @@ -6,8 +6,8 @@ namespace DB { - - + + /** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. */ class ASTQueryWithTableAndOutput : public ASTQueryWithOutput @@ -18,14 +18,28 @@ public: ASTQueryWithTableAndOutput() = default; ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} + +protected: + void formatHelper(const FormatSettings & settings, FormatState & state, FormatStateStacked frame, const char * name) const + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + if (format) + { + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); + format->formatImpl(settings, state, frame); + } + } }; - - + + /// Объявляет класс-наследник ASTQueryWithTableAndOutput с реализованными методами getID и clone. -#define DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(Name, ID) \ +#define DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(Name, ID, Query) \ class Name : public ASTQueryWithTableAndOutput \ { \ -public: \ + public: \ Name() = default; \ Name(const StringRange range_) : ASTQueryWithTableAndOutput(range_) {} \ String getID() const override { return ID"_" + database + "_" + table; }; \ @@ -42,5 +56,11 @@ public: \ } \ return ptr; \ } \ + \ + protected: \ + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override \ + { \ + formatHelper(settings, state, frame, Query); \ + } \ }; } diff --git a/dbms/include/DB/Parsers/ASTSet.h b/dbms/include/DB/Parsers/ASTSet.h index 18edddc999f..d3af0b5f30f 100644 --- a/dbms/include/DB/Parsers/ASTSet.h +++ b/dbms/include/DB/Parsers/ASTSet.h @@ -22,6 +22,17 @@ public: String getID() const override { return "Set_" + getColumnName(); } ASTPtr clone() const override { return new ASTSet(*this); } String getColumnName() const override { return column_name; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + /** Подготовленное множество. В пользовательских запросах такого не бывает, но такое бывает после промежуточных преобразований запроса. + * Выведем его не по-настоящему (это не будет корректным запросом, но покажет, что здесь было множество). + */ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "(...)" + << (settings.hilite ? hilite_none : ""); + } }; } diff --git a/dbms/include/DB/Parsers/ASTSetQuery.h b/dbms/include/DB/Parsers/ASTSetQuery.h index 0334d1167bb..471c76d2855 100644 --- a/dbms/include/DB/Parsers/ASTSetQuery.h +++ b/dbms/include/DB/Parsers/ASTSetQuery.h @@ -26,11 +26,25 @@ public: ASTSetQuery() = default; ASTSetQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Set"; }; ASTPtr clone() const override { return new ASTSetQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SET " << (global ? "GLOBAL " : "") << (settings.hilite ? hilite_none : ""); + + for (ASTSetQuery::Changes::const_iterator it = changes.begin(); it != changes.end(); ++it) + { + if (it != changes.begin()) + settings.ostr << ", "; + + settings.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h b/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h index 8d06950319e..c1bd4f35eb3 100644 --- a/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h +++ b/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h @@ -5,5 +5,7 @@ namespace DB { - DEFINE_AST_QUERY_WITH_OUTPUT(ASTShowProcesslistQuery, "ShowProcesslistQuery") + +DEFINE_AST_QUERY_WITH_OUTPUT(ASTShowProcesslistQuery, "ShowProcesslistQuery", "SHOW PROCESSLIST") + } diff --git a/dbms/include/DB/Parsers/ASTShowTablesQuery.h b/dbms/include/DB/Parsers/ASTShowTablesQuery.h index 4b51b8f1aba..ccd40bb164e 100644 --- a/dbms/include/DB/Parsers/ASTShowTablesQuery.h +++ b/dbms/include/DB/Parsers/ASTShowTablesQuery.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -20,7 +21,7 @@ public: ASTShowTablesQuery() = default; ASTShowTablesQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "ShowTables"; }; @@ -30,15 +31,43 @@ public: ASTPtr ptr{res}; res->children.clear(); - + if (format) { res->format = format->clone(); res->children.push_back(res->format); } - + return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (databases) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW TABLES" << (settings.hilite ? hilite_none : ""); + + if (!from.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(from); + + if (!like.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "") + << mysqlxx::quote << like; + } + + if (format) + { + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); + format->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index 8dac88b26d3..461129a0ae6 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -34,6 +34,30 @@ public: } String getColumnName() const override { return getTreeID(); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string nl_or_nothing = settings.one_line ? "" : "\n"; + + settings.ostr << nl_or_nothing << indent_str << "(" << nl_or_nothing; + FormatStateStacked frame_dont_need_parens = frame; + frame_dont_need_parens.need_parens = false; + children[0]->formatImpl(settings, state, frame_dont_need_parens); + settings.ostr << nl_or_nothing << indent_str << ")"; + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTUseQuery.h b/dbms/include/DB/Parsers/ASTUseQuery.h index eafe3496293..a1e354b39b0 100644 --- a/dbms/include/DB/Parsers/ASTUseQuery.h +++ b/dbms/include/DB/Parsers/ASTUseQuery.h @@ -16,11 +16,18 @@ public: ASTUseQuery() = default; ASTUseQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "UseQuery_" + database; }; ASTPtr clone() const override { return new ASTUseQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + return; + } }; } diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index cb08ef5b408..4ee619d0c9f 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -171,7 +171,7 @@ public: formatImpl(settings, state, FormatStateStacked()); } -protected: + /// Для подсветки синтаксиса. static const char * hilite_keyword; static const char * hilite_identifier; @@ -190,7 +190,7 @@ protected: ErrorCodes::UNKNOWN_ELEMENT_IN_AST); } - void writeAlias(const String & name, std::ostream & s, bool hilite); + void writeAlias(const String & name, std::ostream & s, bool hilite) const; private: size_t checkDepthImpl(size_t max_depth, size_t level) const diff --git a/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h b/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h index eb06c893167..793ee655b7d 100644 --- a/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h @@ -5,17 +5,9 @@ namespace DB { - - /** EXISTS запрос - */ - DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTExistsQuery, "ExistsQuery") - - /** SHOW CREATE TABLE запрос - */ - DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTShowCreateQuery, "ShowCreateQuery") - - /** DESCRIBE TABLE запрос - */ - DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTDescribeQuery, "DescribeQuery") - + +DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTExistsQuery, "ExistsQuery", "EXISTS TABLE") +DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTShowCreateQuery, "ShowCreateQuery", "SHOW CREATE TABLE") +DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTDescribeQuery, "DescribeQuery", "DESCRIBE TABLE") + } diff --git a/dbms/include/DB/Parsers/formatAST.h b/dbms/include/DB/Parsers/formatAST.h index 3fdfded2aa3..d94602e9d66 100644 --- a/dbms/include/DB/Parsers/formatAST.h +++ b/dbms/include/DB/Parsers/formatAST.h @@ -12,11 +12,14 @@ namespace DB /** Берёт синтаксическое дерево и превращает его обратно в текст. * В случае запроса INSERT, данные будут отсутствовать. */ -void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false); +inline void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false) +{ + IAST::FormatSettings settings(s, hilite, one_line); + ast.format(settings); +} String formatColumnsForCreateQuery(NamesAndTypesList & columns); -String backQuoteIfNeed(const String & x); inline std::ostream & operator<<(std::ostream & os, const IAST & ast) { return formatAST(ast, os, 0, false, true), os; } inline std::ostream & operator<<(std::ostream & os, const ASTPtr & ast) { return formatAST(*ast, os, 0, false, true), os; } diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index fe85ba99bbb..1791d837970 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -4,194 +4,196 @@ namespace DB { - void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + +void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + FormatStateStacked nested_need_parens = frame; + FormatStateStacked nested_dont_need_parens = frame; + nested_need_parens.need_parens = true; + nested_dont_need_parens.need_parens = false; + + /// Стоит ли записать эту функцию в виде оператора? + bool written = false; + if (arguments && !parameters) { - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (frame.need_parens && !alias.empty()) - settings.ostr << '('; - - FormatStateStacked nested_need_parens; - FormatStateStacked nested_dont_need_parens; - nested_need_parens.need_parens = true; - nested_dont_need_parens.need_parens = false; - - /// Стоит ли записать эту функцию в виде оператора? - bool written = false; - if (arguments && !parameters) + if (arguments->children.size() == 1) { - if (arguments->children.size() == 1) + const char * operators[] = { - const char * operators[] = - { - "negate", "-", - "not", "NOT ", - nullptr - }; + "negate", "-", + "not", "NOT ", + nullptr + }; - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(name.c_str(), func[0])) - { - settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - - /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: - * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. - * Вместо этого, добавим пробел. - * PS. Нельзя просто попросить добавить скобки - см. formatImpl для ASTLiteral. - */ - if (name == "negate" && typeid_cast(&*arguments->children[0])) - settings.ostr << ' '; - - FormatStateStacked nested_frame; - nested_frame.need_parens = true; - arguments->formatImpl(settings, state, nested_need_parens); - written = true; - } - } - } - - /** need_parens - нужны ли скобки вокруг выражения с оператором. - * Они нужны, только если это выражение входит в другое выражение с оператором. - */ - - if (!written && arguments->children.size() == 2) + for (const char ** func = operators; *func; func += 2) { - const char * operators[] = + if (0 == strcmp(name.c_str(), func[0])) { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "notLike", " NOT LIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", - nullptr - }; + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(name.c_str(), func[0])) - { - if (frame.need_parens) - settings.ostr << '('; - arguments->children[0]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); - if (frame.need_parens) - settings.ostr << ')'; - written = true; - } - } + /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: + * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. + * Вместо этого, добавим пробел. + * PS. Нельзя просто попросить добавить скобки - см. formatImpl для ASTLiteral. + */ + if (name == "negate" && typeid_cast(&*arguments->children[0])) + settings.ostr << ' '; - if (!written && 0 == strcmp(name.c_str(), "arrayElement")) - { - arguments->children[0]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + FormatStateStacked nested_frame; + nested_frame.need_parens = true; + arguments->formatImpl(settings, state, nested_need_parens); written = true; } + } + } - if (!written && 0 == strcmp(name.c_str(), "tupleElement")) + /** need_parens - нужны ли скобки вокруг выражения с оператором. + * Они нужны, только если это выражение входит в другое выражение с оператором. + */ + + if (!written && arguments->children.size() == 2) + { + const char * operators[] = + { + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "like", " LIKE ", + "notLike", " NOT LIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) { + if (frame.need_parens) + settings.ostr << '('; arguments->children[0]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_need_parens); + if (frame.need_parens) + settings.ostr << ')'; written = true; } } - if (!written && arguments->children.size() >= 2) - { - const char * operators[] = - { - "and", " AND ", - "or", " OR ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(name.c_str(), func[0])) - { - if (frame.need_parens) - settings.ostr << '('; - for (size_t i = 0; i < arguments->children.size(); ++i) - { - if (i != 0) - settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - arguments->children[i]->formatImpl(settings, state, nested_need_parens); - } - if (frame.need_parens) - settings.ostr << ')'; - written = true; - } - } - } - - if (!written && arguments->children.size() >= 1 && 0 == strcmp(name.c_str(), "array")) + if (!written && 0 == strcmp(name.c_str(), "arrayElement")) { + arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0; i < arguments->children.size(); ++i) - { - if (i != 0) - settings.ostr << ", "; - arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); - } + arguments->children[1]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); written = true; } - if (!written && arguments->children.size() >= 2 && 0 == strcmp(name.c_str(), "tuple")) + if (!written && 0 == strcmp(name.c_str(), "tupleElement")) { - settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0; i < arguments->children.size(); ++i) - { - if (i != 0) - settings.ostr << ", "; - arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); - } - settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); written = true; } } - if (!written) + if (!written && arguments->children.size() >= 2) { - settings.ostr << (settings.hilite ? hilite_function : "") << name; - - if (parameters) + const char * operators[] = { - settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - parameters->formatImpl(settings, state, nested_dont_need_parens); - settings.ostr << (settings.hilite ? hilite_function : "") << ')'; - } + "and", " AND ", + "or", " OR ", + nullptr + }; - if (arguments) + for (const char ** func = operators; *func; func += 2) { - settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - arguments->formatImpl(settings, state, nested_dont_need_parens); - settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + if (0 == strcmp(name.c_str(), func[0])) + { + if (frame.need_parens) + settings.ostr << '('; + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + arguments->children[i]->formatImpl(settings, state, nested_need_parens); + } + if (frame.need_parens) + settings.ostr << ')'; + written = true; + } } - - settings.ostr << (settings.hilite ? hilite_none : ""); } - if (!alias.empty()) + if (!written && arguments->children.size() >= 1 && 0 == strcmp(name.c_str(), "array")) { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; + settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + written = true; + } + + if (!written && arguments->children.size() >= 2 && 0 == strcmp(name.c_str(), "tuple")) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); + written = true; } } + + if (!written) + { + settings.ostr << (settings.hilite ? hilite_function : "") << name; + + if (parameters) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + parameters->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + if (arguments) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + arguments->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); + } + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } +} + } diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 5da7767dda5..76064357b2f 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -220,148 +220,148 @@ const IAST * ASTSelectQuery::getFormat() const } -void ASTSelectQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override +void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (s.hilite ? hilite_none : ""); - settings.one_line - ? select_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*select_expression_list).formatImplMultiline(settings, state, frame); + s.one_line + ? select_expression_list->formatImpl(s, state, frame) + : typeid_cast(*select_expression_list).formatImplMultiline(s, state, frame); if (table) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FROM " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FROM " << (s.hilite ? hilite_none : ""); if (database) { - database->formatImpl(settings, state, frame); - settings.ostr << "."; + database->formatImpl(s, state, frame); + s.ostr << "."; } if (typeid_cast(&*table)) { - if (settings.one_line) - settings.ostr << " ("; + if (s.one_line) + s.ostr << " ("; else - settings.ostr << "\n" << indent_str << "(\n"; + s.ostr << "\n" << indent_str << "(\n"; - table->formatImpl(settings, state, frame); + table->formatImpl(s, state, frame); - if (settings.one_line) - settings.ostr << ")"; + if (s.one_line) + s.ostr << ")"; else - settings.ostr << "\n" << indent_str << ")"; + s.ostr << "\n" << indent_str << ")"; } else - table->formatImpl(settings, state, frame); + table->formatImpl(s, state, frame); } if (final) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FINAL" << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FINAL" << (s.hilite ? hilite_none : ""); } if (sample_size) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SAMPLE " << (settings.hilite ? hilite_none : ""); - sample_size->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SAMPLE " << (s.hilite ? hilite_none : ""); + sample_size->formatImpl(s, state, frame); } if (array_join_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str - << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str + << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (s.hilite ? hilite_none : ""); - settings.one_line - ? array_join_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*array_join_expression_list).formatImplMultiline(settings, state, frame); + s.one_line + ? array_join_expression_list->formatImpl(s, state, frame) + : typeid_cast(*array_join_expression_list).formatImplMultiline(s, state, frame); } if (join) { - settings.ostr << " "; - join->formatImpl(settings, state, frame); + s.ostr << " "; + join->formatImpl(s, state, frame); } if (prewhere_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "PREWHERE " << (settings.hilite ? hilite_none : ""); - prewhere_expression->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "PREWHERE " << (s.hilite ? hilite_none : ""); + prewhere_expression->formatImpl(s, state, frame); } if (where_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "WHERE " << (settings.hilite ? hilite_none : ""); - where_expression, s, indent, hilite, settings.one_line); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "WHERE " << (s.hilite ? hilite_none : ""); + where_expression->formatImpl(s, state, frame); } if (group_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "GROUP BY " << (settings.hilite ? hilite_none : ""); - settings.one_line - ? group_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*group_expression_list).formatImplMultiline(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY " << (s.hilite ? hilite_none : ""); + s.one_line + ? group_expression_list->formatImpl(s, state, frame) + : typeid_cast(*group_expression_list).formatImplMultiline(s, state, frame); } if (group_by_with_totals) - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << (settings.one_line ? "" : " ") << "WITH TOTALS" << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH TOTALS" << (s.hilite ? hilite_none : ""); if (having_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "HAVING " << (settings.hilite ? hilite_none : ""); - having_expression->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "HAVING " << (s.hilite ? hilite_none : ""); + having_expression->formatImpl(s, state, frame); } if (order_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "ORDER BY " << (settings.hilite ? hilite_none : ""); - settings.one_line - ? order_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*order_expression_list).formatImplMultiline(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); + s.one_line + ? order_expression_list->formatImpl(s, state, frame) + : typeid_cast(*order_expression_list).formatImplMultiline(s, state, frame); } if (limit_length) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "LIMIT " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : ""); if (limit_offset) { - limit_offset->formatImpl(settings, state, frame); - settings.ostr << ", "; + limit_offset->formatImpl(s, state, frame); + s.ostr << ", "; } - limit_length->formatImpl(settings, state, frame); + limit_length->formatImpl(s, state, frame); } if (settings) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SETTINGS " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SETTINGS " << (s.hilite ? hilite_none : ""); const ASTSetQuery & ast_set = typeid_cast(*settings); for (ASTSetQuery::Changes::const_iterator it = ast_set.changes.begin(); it != ast_set.changes.end(); ++it) { if (it != ast_set.changes.begin()) - settings.ostr << ", "; + s.ostr << ", "; - settings.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); + s.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); } } if (format) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); - format->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FORMAT " << (s.hilite ? hilite_none : ""); + format->formatImpl(s, state, frame); } if (next_union_all) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "UNION ALL " << settings.nl_or_ws << settings.ostr << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "UNION ALL " << s.nl_or_ws << (s.hilite ? hilite_none : ""); // NOTE Мы можем безопасно применить static_cast вместо typeid_cast, потому что знаем, что в цепочке UNION ALL // имеются только деревья типа SELECT. const ASTSelectQuery & next_ast = static_cast(*next_union_all); - next_ast->formatImpl(settings, state, frame); + next_ast.formatImpl(s, state, frame); } } diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index c8036519eae..2c6f0b8a3e1 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -25,7 +25,7 @@ String backQuoteIfNeed(const String & x) } -void IAST::writeAlias(const String & name, std::ostream & s, bool hilite) +void IAST::writeAlias(const String & name, std::ostream & s, bool hilite) const { s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 4e0d38ea6b8..7966b0db491 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -1,978 +1,9 @@ -#include - -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include - #include namespace DB { - -static const char * hilite_keyword = "\033[1m"; -static const char * hilite_identifier = "\033[0;36m"; -static const char * hilite_function = "\033[0;33m"; -static const char * hilite_operator = "\033[1;33m"; -static const char * hilite_alias = "\033[0;32m"; -static const char * hilite_none = "\033[0m"; - - -/// Квотировать идентификатор обратными кавычками, если это требуется. -String backQuoteIfNeed(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeProbablyBackQuotedString(x, wb); - } - return res; -} - - -static String highlight(const String & keyword, const String & color_sequence, const bool hilite) -{ - return hilite ? color_sequence + keyword + hilite_none : keyword; -} - - -static void writeAlias(const String & name, std::ostream & s, bool hilite, bool one_line) -{ - s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); - - WriteBufferFromOStream wb(s, 32); - writeProbablyBackQuotedString(name, wb); - wb.next(); - - s << (hilite ? hilite_none : ""); -} - - -struct FormatState -{ - std::ostream & s; - bool hilite; - bool one_line; - - void formatImpl(const IAST & ast, size_t indent, bool need_parens); - - -}; - - - - - -void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) - { - if (it != ast.children.begin()) - s << ", "; - - formatAST(**it, s, indent, hilite, one_line, need_parens); - } -} - -/** Вывести список выражений в секциях запроса SELECT - по одному выражению на строку. - */ -static void formatExpressionListMultiline(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite) -{ - std::string indent_str = "\n" + std::string(4 * (indent + 1), ' '); - - for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) - { - if (it != ast.children.begin()) - s << ", "; - - if (ast.children.size() > 1) - s << indent_str; - - formatAST(**it, s, indent + 1, hilite, false); - } -} - - -void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_nothing = one_line ? "" : "\n"; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << (hilite ? hilite_keyword : "") << indent_str << "SELECT " << (ast.distinct ? "DISTINCT " : "") << (hilite ? hilite_none : ""); - one_line - ? formatAST(*ast.select_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.select_expression_list), s, indent, hilite); - - if (ast.table) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FROM " << (hilite ? hilite_none : ""); - if (ast.database) - { - formatAST(*ast.database, s, indent, hilite, one_line); - s << "."; - } - - if (typeid_cast(&*ast.table)) - { - if (one_line) - s << " ("; - else - s << "\n" << indent_str << "(\n"; - - formatAST(*ast.table, s, indent + 1, hilite, one_line); - - if (one_line) - s << ")"; - else - s << "\n" << indent_str << ")"; - } - else - formatAST(*ast.table, s, indent, hilite, one_line); - } - - if (ast.final) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FINAL" << (hilite ? hilite_none : ""); - } - - if (ast.sample_size) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "SAMPLE " << (hilite ? hilite_none : ""); - formatAST(*ast.sample_size, s, indent, hilite, one_line); - } - - if (ast.array_join_expression_list) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str - << (ast.array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (hilite ? hilite_none : ""); - - one_line - ? formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.array_join_expression_list), s, indent, hilite); - } - - if (ast.join) - { - s << " "; - formatAST(*ast.join, s, indent, hilite, one_line); - } - - if (ast.prewhere_expression) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "PREWHERE " << (hilite ? hilite_none : ""); - formatAST(*ast.prewhere_expression, s, indent, hilite, one_line); - } - - if (ast.where_expression) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "WHERE " << (hilite ? hilite_none : ""); - formatAST(*ast.where_expression, s, indent, hilite, one_line); - } - - if (ast.group_expression_list) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "GROUP BY " << (hilite ? hilite_none : ""); - one_line - ? formatAST(*ast.group_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.group_expression_list), s, indent, hilite); - } - - if (ast.group_by_with_totals) - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << (one_line ? "" : " ") << "WITH TOTALS" << (hilite ? hilite_none : ""); - - if (ast.having_expression) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "HAVING " << (hilite ? hilite_none : ""); - formatAST(*ast.having_expression, s, indent, hilite, one_line); - } - - if (ast.order_expression_list) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ORDER BY " << (hilite ? hilite_none : ""); - one_line - ? formatAST(*ast.order_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.order_expression_list), s, indent, hilite); - } - - if (ast.limit_length) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "LIMIT " << (hilite ? hilite_none : ""); - if (ast.limit_offset) - { - formatAST(*ast.limit_offset, s, indent, hilite, one_line); - s << ", "; - } - formatAST(*ast.limit_length, s, indent, hilite, one_line); - } - - if (ast.settings) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "SETTINGS " << (hilite ? hilite_none : ""); - - const ASTSetQuery & ast_set = typeid_cast(*ast.settings); - for (ASTSetQuery::Changes::const_iterator it = ast_set.changes.begin(); it != ast_set.changes.end(); ++it) - { - if (it != ast_set.changes.begin()) - s << ", "; - - s << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); - } - } - - if (ast.format) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FORMAT " << (hilite ? hilite_none : ""); - formatAST(*ast.format, s, indent, hilite, one_line); - } - - if (ast.next_union_all) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "UNION ALL " << nl_or_ws << (hilite ? hilite_none : ""); - - // NOTE Мы можем безопасно применить static_cast вместо typeid_cast, потому что знаем, что в цепочке UNION ALL - // имеются только деревья типа SELECT. - const ASTSelectQuery & next_ast = static_cast(*ast.next_union_all); - - formatAST(next_ast, s, indent, hilite, one_line, need_parens); - } -} - -void formatAST(const ASTSubquery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (need_parens && !ast.alias.empty()) - s << '('; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_nothing = one_line ? "" : "\n"; - - s << nl_or_nothing << indent_str << "(" << nl_or_nothing; - formatAST(*ast.children[0], s, indent + 1, hilite, one_line); - s << nl_or_nothing << indent_str << ")"; - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTCreateQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_ws = one_line ? " " : "\n"; - - if (!ast.database.empty() && ast.table.empty()) - { - s << (hilite ? hilite_keyword : "") << (ast.attach ? "ATTACH DATABASE " : "CREATE DATABASE ") << (ast.if_not_exists ? "IF NOT EXISTS " : "") << (hilite ? hilite_none : "") - << backQuoteIfNeed(ast.database); - return; - } - - { - std::string what = "TABLE"; - if (ast.is_view) - what = "VIEW"; - if (ast.is_materialized_view) - what = "MATERIALIZED VIEW"; - - s << (hilite ? hilite_keyword : "") << (ast.attach ? "ATTACH " : "CREATE ") << (ast.is_temporary ? "TEMPORARY " : "") << what << " " << (ast.if_not_exists ? "IF NOT EXISTS " : "") << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); - } - - if (!ast.as_table.empty()) - { - s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_none : "") - << (!ast.as_database.empty() ? backQuoteIfNeed(ast.as_database) + "." : "") << backQuoteIfNeed(ast.as_table); - } - - if (ast.columns) - { - s << (one_line ? " (" : "\n("); - formatAST(*ast.columns, s, indent + 1, hilite, one_line); - s << (one_line ? ")" : "\n)"); - } - - if (ast.storage && !ast.is_materialized_view && !ast.is_view) - { - s << (hilite ? hilite_keyword : "") << " ENGINE" << (hilite ? hilite_none : "") << " = "; - formatAST(*ast.storage, s, indent, hilite, one_line); - } - - if (ast.inner_storage) - { - s << (hilite ? hilite_keyword : "") << " ENGINE" << (hilite ? hilite_none : "") << " = "; - formatAST(*ast.inner_storage, s, indent, hilite, one_line); - } - - if (ast.is_populate) - { - s << (hilite ? hilite_keyword : "") << " POPULATE" << (hilite ? hilite_none : ""); - } - - if (ast.select) - { - s << (hilite ? hilite_keyword : "") << " AS" << nl_or_ws << (hilite ? hilite_none : ""); - formatAST(*ast.select, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTDropQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (ast.table.empty() && !ast.database.empty()) - { - s << (hilite ? hilite_keyword : "") << (ast.detach ? "DETACH DATABASE " : "DROP DATABASE ") << (ast.if_exists ? "IF EXISTS " : "") << (hilite ? hilite_none : "") << backQuoteIfNeed(ast.database); - return; - } - - s << (hilite ? hilite_keyword : "") << (ast.detach ? "DETACH TABLE " : "DROP TABLE ") << (ast.if_exists ? "IF EXISTS " : "") << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); -} - -void formatAST(const ASTOptimizeQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); -} - -void formatAST(const ASTQueryWithTableAndOutput & ast, std::string name, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << name << " " << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); - - if (ast.format) - { - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FORMAT " << (hilite ? hilite_none : ""); - formatAST(*ast.format, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTExistsQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(static_cast(ast), "EXISTS TABLE", s, indent, hilite, one_line, false); -} - -void formatAST(const ASTDescribeQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(static_cast(ast), "DESCRIBE TABLE", s, indent, hilite, one_line, false); -} - -void formatAST(const ASTShowCreateQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(static_cast(ast), "SHOW CREATE TABLE", s, indent, hilite, one_line, false); -} - -void formatAST(const ASTRenameQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "RENAME TABLE " << (hilite ? hilite_none : ""); - - for (ASTRenameQuery::Elements::const_iterator it = ast.elements.begin(); it != ast.elements.end(); ++it) - { - if (it != ast.elements.begin()) - s << ", "; - - s << (!it->from.database.empty() ? backQuoteIfNeed(it->from.database) + "." : "") << backQuoteIfNeed(it->from.table) - << (hilite ? hilite_keyword : "") << " TO " << (hilite ? hilite_none : "") - << (!it->to.database.empty() ? backQuoteIfNeed(it->to.database) + "." : "") << backQuoteIfNeed(it->to.table); - } -} - -void formatAST(const ASTSetQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "SET " << (ast.global ? "GLOBAL " : "") << (hilite ? hilite_none : ""); - - for (ASTSetQuery::Changes::const_iterator it = ast.changes.begin(); it != ast.changes.end(); ++it) - { - if (it != ast.changes.begin()) - s << ", "; - - s << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); - } -} - -void formatAST(const ASTShowTablesQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (ast.databases) - { - s << (hilite ? hilite_keyword : "") << "SHOW DATABASES" << (hilite ? hilite_none : ""); - } - else - { - s << (hilite ? hilite_keyword : "") << "SHOW TABLES" << (hilite ? hilite_none : ""); - - if (!ast.from.empty()) - s << (hilite ? hilite_keyword : "") << " FROM " << (hilite ? hilite_none : "") - << backQuoteIfNeed(ast.from); - - if (!ast.like.empty()) - s << (hilite ? hilite_keyword : "") << " LIKE " << (hilite ? hilite_none : "") - << mysqlxx::quote << ast.like; - } - - if (ast.format) - { - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FORMAT " << (hilite ? hilite_none : ""); - formatAST(*ast.format, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTUseQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "USE " << (hilite ? hilite_none : "") << backQuoteIfNeed(ast.database); - return; -} - -void formatAST(const ASTShowProcesslistQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "SHOW PROCESSLIST" << (hilite ? hilite_none : ""); - return; -} - -void formatAST(const ASTInsertQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "INSERT INTO " << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); - - if (!ast.insert_id.empty()) - s << (hilite ? hilite_keyword : "") << " ID = " << (hilite ? hilite_none : "") - << mysqlxx::quote << ast.insert_id; - - if (ast.columns) - { - s << " ("; - formatAST(*ast.columns, s, indent, hilite, one_line); - s << ")"; - } - - if (ast.select) - { - s << " "; - formatAST(*ast.select, s, indent, hilite, one_line); - } - else - { - if (!ast.format.empty()) - { - s << (hilite ? hilite_keyword : "") << " FORMAT " << (hilite ? hilite_none : "") << ast.format; - } - else - { - s << (hilite ? hilite_keyword : "") << " VALUES" << (hilite ? hilite_none : ""); - } - } -} - -void formatAST(const ASTFunction & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (need_parens && !ast.alias.empty()) - s << '('; - - /// Стоит ли записать эту функцию в виде оператора? - bool written = false; - if (ast.arguments && !ast.parameters) - { - if (ast.arguments->children.size() == 1) - { - const char * operators[] = - { - "negate", "-", - "not", "NOT ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(ast.name.c_str(), func[0])) - { - s << (hilite ? hilite_operator : "") << func[1] << (hilite ? hilite_none : ""); - - /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: - * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. - * Вместо этого, добавим пробел. - * PS. Нельзя просто попросить добавить скобки - см. formatAST для ASTLiteral. - */ - if (ast.name == "negate" && typeid_cast(&*ast.arguments->children[0])) - s << ' '; - - formatAST(*ast.arguments, s, indent, hilite, one_line, true); - written = true; - } - } - } - - /** need_parens - нужны ли скобки вокруг выражения с оператором. - * Они нужны, только если это выражение входит в другое выражение с оператором. - */ - - if (!written && ast.arguments->children.size() == 2) - { - const char * operators[] = - { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "notLike", " NOT LIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(ast.name.c_str(), func[0])) - { - if (need_parens) - s << '('; - formatAST(*ast.arguments->children[0], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << func[1] << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[1], s, indent, hilite, one_line, true); - if (need_parens) - s << ')'; - written = true; - } - } - - if (!written && 0 == strcmp(ast.name.c_str(), "arrayElement")) - { - formatAST(*ast.arguments->children[0], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << '[' << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[1], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << ']' << (hilite ? hilite_none : ""); - written = true; - } - - if (!written && 0 == strcmp(ast.name.c_str(), "tupleElement")) - { - formatAST(*ast.arguments->children[0], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << "." << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[1], s, indent, hilite, one_line, true); - written = true; - } - } - - if (!written && ast.arguments->children.size() >= 2) - { - const char * operators[] = - { - "and", " AND ", - "or", " OR ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(ast.name.c_str(), func[0])) - { - if (need_parens) - s << '('; - for (size_t i = 0; i < ast.arguments->children.size(); ++i) - { - if (i != 0) - s << (hilite ? hilite_operator : "") << func[1] << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[i], s, indent, hilite, one_line, true); - } - if (need_parens) - s << ')'; - written = true; - } - } - } - - if (!written && ast.arguments->children.size() >= 1 && 0 == strcmp(ast.name.c_str(), "array")) - { - s << (hilite ? hilite_operator : "") << '[' << (hilite ? hilite_none : ""); - for (size_t i = 0; i < ast.arguments->children.size(); ++i) - { - if (i != 0) - s << ", "; - formatAST(*ast.arguments->children[i], s, indent, hilite, one_line, false); - } - s << (hilite ? hilite_operator : "") << ']' << (hilite ? hilite_none : ""); - written = true; - } - - if (!written && ast.arguments->children.size() >= 2 && 0 == strcmp(ast.name.c_str(), "tuple")) - { - s << (hilite ? hilite_operator : "") << '(' << (hilite ? hilite_none : ""); - for (size_t i = 0; i < ast.arguments->children.size(); ++i) - { - if (i != 0) - s << ", "; - formatAST(*ast.arguments->children[i], s, indent, hilite, one_line, false); - } - s << (hilite ? hilite_operator : "") << ')' << (hilite ? hilite_none : ""); - written = true; - } - } - - if (!written) - { - s << (hilite ? hilite_function : "") << ast.name; - - if (ast.parameters) - { - s << '(' << (hilite ? hilite_none : ""); - formatAST(*ast.parameters, s, indent, hilite, one_line); - s << (hilite ? hilite_function : "") << ')'; - } - - if (ast.arguments) - { - s << '(' << (hilite ? hilite_none : ""); - formatAST(*ast.arguments, s, indent, hilite, one_line); - s << (hilite ? hilite_function : "") << ')'; - } - - s << (hilite ? hilite_none : ""); - } - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTIdentifier & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (need_parens && !ast.alias.empty()) - s << '('; - - s << (hilite ? hilite_identifier : ""); - - WriteBufferFromOStream wb(s, 32); - writeProbablyBackQuotedString(ast.name, wb); - wb.next(); - - s << (hilite ? hilite_none : ""); - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTLiteral & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (need_parens && !ast.alias.empty()) - s << '('; - - s << apply_visitor(FieldVisitorToString(), ast.value); - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTNameTypePair & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << nl_or_ws << indent_str << backQuoteIfNeed(ast.name) << " "; - formatAST(*ast.type, s, indent, hilite, one_line); -} - -void formatAST(const ASTColumnDeclaration & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << nl_or_ws << indent_str << backQuoteIfNeed(ast.name); - if (ast.type) - { - s << ' '; - formatAST(*ast.type, s, indent, hilite, one_line); - } - - if (ast.default_expression) - { - s << ' ' << highlight(ast.default_specifier, hilite_keyword, hilite) << ' '; - formatAST(*ast.default_expression, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTAsterisk & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << "*"; -} - -void formatAST(const ASTOrderByElement & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(*ast.children.front(), s, indent, hilite, one_line); - s << (hilite ? hilite_keyword : "") << (ast.direction == -1 ? " DESC" : " ASC") << (hilite ? hilite_none : ""); - if (!ast.collator.isNull()) - { - s << (hilite ? hilite_keyword : "") << " COLLATE " << (hilite ? hilite_none : "") - << "'" << ast.collator->getLocale() << "'"; - } -} - -void formatAST(const ASTAlterQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_nothing = one_line ? "" : "\n"; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << (hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (hilite ? hilite_none : ""); - - if (!ast.table.empty()) - { - if (!ast.database.empty()) - { - s << indent_str << ast.database; - s << "."; - } - s << indent_str << ast.table; - } - s << nl_or_ws; - - for (size_t i = 0; i < ast.parameters.size(); ++i) - { - const ASTAlterQuery::Parameters &p = ast.parameters[i]; - - if (p.type == ASTAlterQuery::ADD_COLUMN) - { - s << (hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (hilite ? hilite_none : ""); - formatAST(*p.col_decl, s, indent, hilite, true); - - /// AFTER - if (p.column) - { - s << (hilite ? hilite_keyword : "") << indent_str << " AFTER " << (hilite ? hilite_none : ""); - formatAST(*p.column, s, indent, hilite, one_line); - } - } - else if (p.type == ASTAlterQuery::DROP_COLUMN) - { - s << (hilite ? hilite_keyword : "") << indent_str << "DROP COLUMN " << (hilite ? hilite_none : ""); - formatAST(*p.column, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::MODIFY_COLUMN) - { - s << (hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (hilite ? hilite_none : ""); - formatAST(*p.col_decl, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::DROP_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << (p.detach ? "DETACH" : "DROP") << " PARTITION " - << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::ATTACH_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << "ATTACH " << (p.unreplicated ? "UNREPLICATED " : "") - << (p.part ? "PART " : "PARTITION ") << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::FETCH_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << "FETCH " << (p.unreplicated ? "UNREPLICATED " : "") - << "PARTITION " << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - s << (hilite ? hilite_keyword : "") << " FROM " << (hilite ? hilite_none : "") - << mysqlxx::quote << p.from; - } - else if (p.type == ASTAlterQuery::FREEZE_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - } - else - throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - - std::string comma = (i < (ast.parameters.size() -1) ) ? "," : ""; - s << (hilite ? hilite_keyword : "") << indent_str << comma << (hilite ? hilite_none : ""); - - s << nl_or_ws; - } -} - -void formatAST(const ASTSet & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - /** Подготовленное множество. В пользовательских запросах такого не бывает, но такое бывает после промежуточных преобразований запроса. - * Выведем его не по-настоящему (это не будет корректным запросом, но покажет, что здесь было множество). - */ - s << (hilite ? hilite_keyword : "") - << "(...)" - << (hilite ? hilite_none : ""); -} - -void formatAST(const ASTJoin & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : ""); - - if (ast.locality == ASTJoin::Global) - s << "GLOBAL "; - - if (ast.kind != ASTJoin::Cross) - s << (ast.strictness == ASTJoin::Any ? "ANY " : "ALL "); - - s << (ast.kind == ASTJoin::Inner ? "INNER " - : (ast.kind == ASTJoin::Left ? "LEFT " - : (ast.kind == ASTJoin::Right ? "RIGHT " - : (ast.kind == ASTJoin::Cross ? "CROSS " - : "FULL OUTER ")))); - - s << "JOIN " - << (hilite ? hilite_none : ""); - - formatAST(*ast.table, s, indent, hilite, one_line, need_parens); - - if (ast.kind != ASTJoin::Cross) - { - s << (hilite ? hilite_keyword : "") << " USING " << (hilite ? hilite_none : ""); - formatAST(*ast.using_expr_list, s, indent, hilite, one_line, need_parens); - } -} - -void formatAST(const ASTCheckQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_nothing = one_line ? "" : "\n"; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << (hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (hilite ? hilite_none : ""); - - if (!ast.table.empty()) - { - if (!ast.database.empty()) - { - s << (hilite ? hilite_keyword : "") << indent_str << ast.database << (hilite ? hilite_none : ""); - s << "."; - } - s << (hilite ? hilite_keyword : "") << indent_str << ast.table << (hilite ? hilite_none : ""); - } - s << nl_or_ws; -} - -/* -void formatAST(const ASTMultiQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "{" << (hilite ? hilite_none : ""); - - for (const auto & child : ast.children) - { - s << "\n"; - formatAST(*child, s, indent + 1, hilite, one_line, need_parens); - s << ";\n"; - } - - s << (hilite ? hilite_keyword : "") << "}" << (hilite ? hilite_none : ""); -}*/ - - -void formatAST(const IAST & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - FormatState state = { .s = s, .hilite = hilite, .one_line = one_line }; - state.formatImpl(ast, indent, need_parens); -} - - -void FormatState::formatImpl(const IAST & ast, size_t indent, bool need_parens) -{ -#define DISPATCH(NAME) \ - else if (const AST ## NAME * concrete = typeid_cast(&ast)) \ - state.formatImpl(*concrete, indent, need_parens); - - if (false) {} - DISPATCH(SelectQuery) - DISPATCH(InsertQuery) - DISPATCH(CreateQuery) - DISPATCH(DropQuery) - DISPATCH(RenameQuery) - DISPATCH(ShowTablesQuery) - DISPATCH(UseQuery) - DISPATCH(SetQuery) - DISPATCH(OptimizeQuery) - DISPATCH(ExistsQuery) - DISPATCH(ShowCreateQuery) - DISPATCH(DescribeQuery) - DISPATCH(ExpressionList) - DISPATCH(Function) - DISPATCH(Identifier) - DISPATCH(Literal) - DISPATCH(NameTypePair) - DISPATCH(ColumnDeclaration) - DISPATCH(Asterisk) - DISPATCH(OrderByElement) - DISPATCH(Subquery) - DISPATCH(AlterQuery) - DISPATCH(ShowProcesslistQuery) - DISPATCH(Set) - DISPATCH(Join) - DISPATCH(CheckQuery) -// DISPATCH(MultiQuery) - else - throw Exception("Unknown element in AST: " + ast.getID() - + ((ast.range.first && (ast.range.second > ast.range.first)) - ? " '" + std::string(ast.range.first, ast.range.second - ast.range.first) + "'" - : ""), - ErrorCodes::UNKNOWN_ELEMENT_IN_AST); - -#undef DISPATCH -} - - String formatColumnsForCreateQuery(NamesAndTypesList & columns) { std::string res; From 5ed70eeb275e137053269f8d7c0712e376cd553f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 07:28:59 +0300 Subject: [PATCH 48/88] dbms: preparation [#METR-17606]. --- dbms/include/DB/Parsers/ASTFunction.h | 2 +- dbms/include/DB/Parsers/ASTIdentifier.h | 12 +--------- dbms/include/DB/Parsers/ASTLiteral.h | 14 ++---------- dbms/include/DB/Parsers/ASTSubquery.h | 13 +---------- dbms/include/DB/Parsers/ASTWithAlias.h | 30 +++++++++++++++++++++++++ dbms/include/DB/Parsers/IAST.h | 6 ++--- dbms/src/Parsers/ASTFunction.cpp | 13 +---------- 7 files changed, 39 insertions(+), 51 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 3801a32e638..c9857b20c83 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -84,7 +84,7 @@ public: } protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 550973f298f..adeb068f61f 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -44,11 +44,8 @@ public: } protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - if (frame.need_parens && !alias.empty()) - settings.ostr << '('; - settings.ostr << (settings.hilite ? hilite_identifier : ""); WriteBufferFromOStream wb(settings.ostr, 32); @@ -56,13 +53,6 @@ protected: wb.next(); settings.ostr << (settings.hilite ? hilite_none : ""); - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; - } } }; diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index c5f1ec91ed4..6ee40f78afd 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -28,19 +28,9 @@ public: ASTPtr clone() const override { return new ASTLiteral(*this); } protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - if (frame.need_parens && !alias.empty()) - settings.ostr <<'('; - - settings.ostr <formatImpl(settings, state, frame_dont_need_parens); settings.ostr << nl_or_nothing << indent_str << ")"; - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; - } } }; diff --git a/dbms/include/DB/Parsers/ASTWithAlias.h b/dbms/include/DB/Parsers/ASTWithAlias.h index 63eaa186cd3..29478d6a381 100644 --- a/dbms/include/DB/Parsers/ASTWithAlias.h +++ b/dbms/include/DB/Parsers/ASTWithAlias.h @@ -1,5 +1,6 @@ #pragma once +#include #include @@ -19,6 +20,35 @@ public: String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; } String tryGetAlias() const override { return alias; } void setAlias(const String & to) override { alias = to; } + + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final + { + if (!alias.empty()) + { + /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. + if (!state.printed_asts_with_alias.insert(this).second) + { + WriteBufferFromOStream wb(settings.ostr, 32); + writeProbablyBackQuotedString(alias, wb); + return; + } + } + + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr <<'('; + + formatImplWithAlias(settings, state, frame); + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr <<')'; + } + } + + virtual void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; }; /// helper for setting aliases and chaining result to other functions diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 4ee619d0c9f..fb1b2aee2af 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -14,8 +16,6 @@ #include #include -#include - namespace DB { @@ -155,7 +155,7 @@ public: /// Состояние. Например, множество узлов DAG, которых мы уже обошли. struct FormatState { - /// TODO + std::unordered_set printed_asts_with_alias; }; /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 1791d837970..3c2591d3376 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -5,12 +5,8 @@ namespace DB { -void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +void ASTFunction::formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (frame.need_parens && !alias.empty()) - settings.ostr << '('; - FormatStateStacked nested_need_parens = frame; FormatStateStacked nested_dont_need_parens = frame; nested_need_parens.need_parens = true; @@ -187,13 +183,6 @@ void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & stat settings.ostr << (settings.hilite ? hilite_none : ""); } - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; - } } } From 32da2a7d8c1edf94ab4b41bba8af3a717e2fbf5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Aug 2015 00:32:51 +0300 Subject: [PATCH 49/88] dbms: improved query formatting for distributed queries [#METR-17606]. --- dbms/include/DB/Parsers/ASTWithAlias.h | 28 ++-------------- dbms/include/DB/Parsers/IAST.h | 29 ++++++++-------- dbms/src/Parsers/ASTSelectQuery.cpp | 1 + dbms/src/Parsers/ASTWithAlias.cpp | 33 +++++++++++++++++++ .../00211_query_formatting_aliases.reference | 1 + .../00211_query_formatting_aliases.sql | 6 ++++ 6 files changed, 59 insertions(+), 39 deletions(-) create mode 100644 dbms/src/Parsers/ASTWithAlias.cpp create mode 100644 dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference create mode 100644 dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql diff --git a/dbms/include/DB/Parsers/ASTWithAlias.h b/dbms/include/DB/Parsers/ASTWithAlias.h index 29478d6a381..0ade6e26ce9 100644 --- a/dbms/include/DB/Parsers/ASTWithAlias.h +++ b/dbms/include/DB/Parsers/ASTWithAlias.h @@ -21,32 +21,8 @@ public: String tryGetAlias() const override { return alias; } void setAlias(const String & to) override { alias = to; } - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final - { - if (!alias.empty()) - { - /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. - if (!state.printed_asts_with_alias.insert(this).second) - { - WriteBufferFromOStream wb(settings.ostr, 32); - writeProbablyBackQuotedString(alias, wb); - return; - } - } - - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (frame.need_parens && !alias.empty()) - settings.ostr <<'('; - - formatImplWithAlias(settings, state, frame); - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr <<')'; - } - } + /// Вызывает formatImplWithAlias, а также выводит алиас. Если надо - заключает всё выражение в скобки. + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final; virtual void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; }; diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index fb1b2aee2af..2f3bc41db60 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -152,10 +152,13 @@ public: } }; - /// Состояние. Например, множество узлов DAG, которых мы уже обошли. + /// Состояние. Например, может запоминаться множество узлов, которых мы уже обошли. struct FormatState { - std::unordered_set printed_asts_with_alias; + /** Запрос SELECT, в котором найден алиас; идентификатор узла с таким алиасом. + * Нужно, чтобы когда узел встретился повторно, выводить только алиас. + */ + std::set> printed_asts_with_alias; }; /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. @@ -163,6 +166,7 @@ public: { bool indent = 0; bool need_parens = false; + const IAST * current_select = nullptr; }; void format(const FormatSettings & settings) const @@ -171,16 +175,6 @@ public: formatImpl(settings, state, FormatStateStacked()); } - - /// Для подсветки синтаксиса. - static const char * hilite_keyword; - static const char * hilite_identifier; - static const char * hilite_function; - static const char * hilite_operator; - static const char * hilite_alias; - static const char * hilite_none; - - virtual void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { throw Exception("Unknown element in AST: " + getID() @@ -192,6 +186,15 @@ public: void writeAlias(const String & name, std::ostream & s, bool hilite) const; +protected: + /// Для подсветки синтаксиса. + static const char * hilite_keyword; + static const char * hilite_identifier; + static const char * hilite_function; + static const char * hilite_operator; + static const char * hilite_alias; + static const char * hilite_none; + private: size_t checkDepthImpl(size_t max_depth, size_t level) const { diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 76064357b2f..35c4214e75c 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -222,6 +222,7 @@ const IAST * ASTSelectQuery::getFormat() const void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { + frame.current_select = this; frame.need_parens = false; std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp new file mode 100644 index 00000000000..e1319fcafea --- /dev/null +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -0,0 +1,33 @@ +#include + +namespace DB +{ + +void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (!alias.empty()) + { + /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. + if (!state.printed_asts_with_alias.emplace(frame.current_select, getID()).second) + { + WriteBufferFromOStream wb(settings.ostr, 32); + writeProbablyBackQuotedString(alias, wb); + return; + } + } + + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr <<'('; + + formatImplWithAlias(settings, state, frame); + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr <<')'; + } +} + +} diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference new file mode 100644 index 00000000000..dd143e07d02 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference @@ -0,0 +1 @@ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql new file mode 100644 index 00000000000..4628b6ea26a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql @@ -0,0 +1,6 @@ +SELECT toUInt64(1) IN (1234567890, 2345678901, 3456789012, 4567890123, 5678901234, 6789012345, 7890123456, 8901234567, 9012345678, 123456789) AS x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x +FROM remote('127.0.0.1', system, one) SETTINGS max_query_size = 10000; From ec1b05bf540babae4836ec0c407365c6e73d11d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Aug 2015 00:38:52 +0300 Subject: [PATCH 50/88] dbms: added test [#METR-17606]. --- .../0_stateless/00211_query_formatting_aliases.reference | 2 ++ .../queries/0_stateless/00211_query_formatting_aliases.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference index dd143e07d02..b1cd860dcc6 100644 --- a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference @@ -1 +1,3 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +1 1 (2,2) +1 1 (2,2) diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql index 4628b6ea26a..e1006d89d82 100644 --- a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql @@ -4,3 +4,5 @@ SELECT toUInt64(1) IN (1234567890, 2345678901, 3456789012, 4567890123, 567890123 x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x FROM remote('127.0.0.1', system, one) SETTINGS max_query_size = 10000; + +SELECT 1 AS x, x, (SELECT 2 AS x, x) FROM remote('127.0.0.{1,2}', system.one) WHERE (3, 4) IN (SELECT 3 AS x, toUInt8(x + 1)); From ca3a36c3e16916059aed1135e2695082668cb092 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Aug 2015 02:46:15 +0300 Subject: [PATCH 51/88] dbms: addition to prev. revision [#METR-17606]. --- dbms/include/DB/Parsers/ASTFunction.h | 2 +- dbms/include/DB/Parsers/ASTIdentifier.h | 2 +- dbms/include/DB/Parsers/ASTLiteral.h | 2 +- dbms/include/DB/Parsers/ASTSubquery.h | 2 +- dbms/include/DB/Parsers/ASTWithAlias.h | 8 +++++--- dbms/src/Parsers/ASTFunction.cpp | 2 +- dbms/src/Parsers/ASTWithAlias.cpp | 4 ++-- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index c9857b20c83..74b72b25f8b 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -84,7 +84,7 @@ public: } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index adeb068f61f..9056e7dec58 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -44,7 +44,7 @@ public: } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << (settings.hilite ? hilite_identifier : ""); diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index 6ee40f78afd..17c5d87fd4e 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -28,7 +28,7 @@ public: ASTPtr clone() const override { return new ASTLiteral(*this); } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << apply_visitor(FieldVisitorToString(), value); } diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index 9b044ddf0ec..7447a47350d 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -36,7 +36,7 @@ public: String getColumnName() const override { return getTreeID(); } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); std::string nl_or_nothing = settings.one_line ? "" : "\n"; diff --git a/dbms/include/DB/Parsers/ASTWithAlias.h b/dbms/include/DB/Parsers/ASTWithAlias.h index 0ade6e26ce9..53a888baf32 100644 --- a/dbms/include/DB/Parsers/ASTWithAlias.h +++ b/dbms/include/DB/Parsers/ASTWithAlias.h @@ -7,6 +7,7 @@ namespace DB { + /** Базовый класс для AST, которые могут содержать алиас (идентификаторы, литералы, функции). */ class ASTWithAlias : public IAST @@ -21,14 +22,15 @@ public: String tryGetAlias() const override { return alias; } void setAlias(const String & to) override { alias = to; } - /// Вызывает formatImplWithAlias, а также выводит алиас. Если надо - заключает всё выражение в скобки. + /// Вызывает formatImplWithoutAlias, а также выводит алиас. Если надо - заключает всё выражение в скобки. void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final; - virtual void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; + virtual void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; }; /// helper for setting aliases and chaining result to other functions -inline ASTPtr setAlias(ASTPtr ast, const String & alias) { +inline ASTPtr setAlias(ASTPtr ast, const String & alias) +{ ast->setAlias(alias); return ast; }; diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 3c2591d3376..f014f13a6ef 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -5,7 +5,7 @@ namespace DB { -void ASTFunction::formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { FormatStateStacked nested_need_parens = frame; FormatStateStacked nested_dont_need_parens = frame; diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index e1319fcafea..97016f7eb17 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -8,7 +8,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta if (!alias.empty()) { /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. - if (!state.printed_asts_with_alias.emplace(frame.current_select, getID()).second) + if (!state.printed_asts_with_alias.emplace(frame.current_select, alias).second) { WriteBufferFromOStream wb(settings.ostr, 32); writeProbablyBackQuotedString(alias, wb); @@ -20,7 +20,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta if (frame.need_parens && !alias.empty()) settings.ostr <<'('; - formatImplWithAlias(settings, state, frame); + formatImplWithoutAlias(settings, state, frame); if (!alias.empty()) { From e24cad5123d41dd4c30f24356b6ec2e01db673d3 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 15:39:06 +0300 Subject: [PATCH 52/88] dbms: Server: Performance improvements. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 08f28a14be8..8aa6edf22fb 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -99,7 +99,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -109,7 +109,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } From 311a41e14f3ff189f90497de939e1bde4e10d14a Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 18:24:23 +0300 Subject: [PATCH 53/88] Merge --- .../AggregateFunctionUniq.h | 244 ++++++---- .../DB/Common/CombinedCardinalityEstimator.h | 254 ++++++----- dbms/include/DB/Common/HashTable/HashTable.h | 50 ++- dbms/include/DB/Common/HashTable/SmallTable.h | 50 +++ .../HyperLogLogWithSmallSetOptimization.h | 18 +- dbms/include/DB/Core/ErrorCodes.h | 1 + .../00211_aggregate_function_uniq.reference | 416 ++++++++++++++++++ .../00211_aggregate_function_uniq.sql | 35 ++ 8 files changed, 874 insertions(+), 194 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 146bb6a9394..8aa6edf22fb 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -25,32 +25,7 @@ namespace DB { - -template struct AggregateFunctionUniqTraits -{ - static UInt64 hash(T x) { return x; } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float32 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float64 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - +/// uniq struct AggregateFunctionUniqUniquesHashSetData { @@ -60,6 +35,7 @@ struct AggregateFunctionUniqUniquesHashSetData static String getName() { return "uniq"; } }; +/// uniqHLL12 template struct AggregateFunctionUniqHLL12Data @@ -79,6 +55,7 @@ struct AggregateFunctionUniqHLL12Data static String getName() { return "uniqHLL12"; } }; +/// uniqExact template struct AggregateFunctionUniqExactData @@ -122,7 +99,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -132,7 +109,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -140,75 +117,172 @@ struct AggregateFunctionUniqCombinedData namespace detail { - /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. - * Используется для частичной специализации для добавления строк. - */ - template - struct OneAdder + +/** Хэширование 64-битных целочисленных значений в 32-битные. + * Источник: https://gist.github.com/badboy/6267743 + */ +template +struct Hash64To32; + +template +struct Hash64To32::value || std::is_same::value>::type> +{ + static UInt32 compute(T key) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); - } - }; + using U = typename std::make_unsigned::type; + auto x = static_cast(key); - template - struct OneAdder + x = (~x) + (x << 18); + x = x ^ (x >> 31); + x = x * 21; + x = x ^ (x >> 11); + x = x + (x << 6); + x = x ^ (x >> 22); + return static_cast(x); + } +}; + +/** Хэш-функция для uniqCombined. + */ +template +struct CombinedCardinalityTraits +{ + static UInt32 hash(T key) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - /// Имейте ввиду, что вычисление приближённое. - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + return key; + } +}; - template - struct OneAdder > +template +struct CombinedCardinalityTraits::value || std::is_same::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - data.set.insert(static_cast &>(column).getData()[row_num]); - } + return Op::compute(key); }; +}; - template<> - struct OneAdder > +template +struct CombinedCardinalityTraits::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return Op::compute(res); + } +}; - UInt128 key; - SipHash hash; - hash.update(value.data, value.size); - hash.get128(key.first, key.second); - - data.set.insert(key); - } - }; - - template - struct OneAdder > +template +struct CombinedCardinalityTraits::value>::type> +{ + static UInt32 hash(T key) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - if (data.set.isMedium()) - data.set.insert(static_cast &>(column).getData()[row_num]); - else - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); - } - }; + UInt32 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return res; + } +}; - template<> - struct OneAdder > +/** Хэш-функция для uniq. + */ +template struct AggregateFunctionUniqTraits +{ + static UInt64 hash(T x) { return x; } +}; + +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float32 x) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; + +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float64 x) + { + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; + +/** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. + * Используется для частичной специализации для добавления строк. + */ +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) + { + data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + } +}; + +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) + { + /// Имейте ввиду, что вычисление приближённое. + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; + +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) + { + data.set.insert(static_cast &>(column).getData()[row_num]); + } +}; + +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + + UInt128 key; + SipHash hash; + hash.update(value.data, value.size); + hash.get128(key.first, key.second); + + data.set.insert(key); + } +}; + +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + const auto & value = static_cast &>(column).getData()[row_num]; + data.set.insert(CombinedCardinalityTraits::hash(value)); + } +}; + +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; + } diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 43c11380668..00a01232b31 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include namespace DB @@ -11,11 +12,11 @@ namespace DB namespace details { -enum class ContainerType { SMALL, MEDIUM, LARGE }; +enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 }; -ContainerType max(const ContainerType & lhs, const ContainerType & rhs) +static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs) { - unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + UInt8 res = std::max(static_cast(lhs), static_cast(rhs)); return static_cast(res); } @@ -25,38 +26,41 @@ ContainerType max(const ContainerType & lhs, const ContainerType & rhs) * Для среднего - выделяется HashSet. * Для большого - выделяется HyperLogLog. */ -template +template +< + typename Key, + typename HashContainer, + UInt8 small_set_size_max, + UInt8 medium_set_power2_max, + UInt8 K, + typename Hash = IntHash32, + typename DenominatorType = float +> class CombinedCardinalityEstimator { public: - using Self = CombinedCardinalityEstimator; + using Self = CombinedCardinalityEstimator; private: using Small = SmallSet; using Medium = HashContainer; - using Large = HyperLogLogWithSmallSetOptimization; + using Large = HyperLogLogCounter; public: + CombinedCardinalityEstimator() + { + setContainerType(details::ContainerType::SMALL); + } + ~CombinedCardinalityEstimator() { - if (container_type == details::ContainerType::MEDIUM) - { - delete medium; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(medium)); - } - else if (container_type == details::ContainerType::LARGE) - { - delete large; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(large)); - } + destroy(); } void insert(Key value) { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) { if (small.find(value) == small.end()) @@ -66,41 +70,43 @@ public: else { toMedium(); - medium->insert(value); + getContainer().insert(value); } } } else if (container_type == details::ContainerType::MEDIUM) { - if (medium->size() < medium_set_size_max) - medium->insert(value); + auto & container = getContainer(); + if (container.size() < medium_set_size_max) + container.insert(value); else { toLarge(); - large->insert(value); + getContainer().insert(value); } } else if (container_type == details::ContainerType::LARGE) - large->insert(value); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + getContainer().insert(value); } UInt32 size() const { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) return small.size(); else if (container_type == details::ContainerType::MEDIUM) - return medium->size(); + return getContainer().size(); else if (container_type == details::ContainerType::LARGE) - return large->size(); + return getContainer().size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void merge(const Self & rhs) { - details::ContainerType max_container_type = details::max(container_type, rhs.container_type); + auto container_type = getContainerType(); + auto max_container_type = details::max(container_type, rhs.getContainerType()); if (container_type != max_container_type) { @@ -110,41 +116,18 @@ public: toLarge(); } - if (container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - if (rhs.container_type == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.container_type == details::ContainerType::MEDIUM) - { - for (const auto & x : *rhs.medium) - insert(x); - } + for (const auto & x : rhs.getContainer()) + insert(x); } - else if (container_type == details::ContainerType::LARGE) - { - if (rhs.container_type == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.container_type == details::ContainerType::MEDIUM) - { - for (const auto & x : *rhs.medium) - insert(x); - } - else if (rhs.container_type == details::ContainerType::LARGE) - large->merge(*rhs.large); - } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + else if (rhs.getContainerType() == details::ContainerType::LARGE) + getContainer().merge(rhs.getContainer()); } /// Можно вызывать только для пустого объекта. @@ -152,79 +135,96 @@ public: { UInt8 v; readBinary(v, in); - details::ContainerType t = static_cast(v); + auto container_type = static_cast(v); - if (t == details::ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) small.read(in); - else if (t == details::ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { toMedium(); - medium->read(in); + getContainer().read(in); } - else if (t == details::ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { toLarge(); - large->read(in); + getContainer().read(in); } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void readAndMerge(DB::ReadBuffer & in) { - Self other; - other.read(in); - merge(other); + auto container_type = getContainerType(); + + UInt8 v; + readBinary(v, in); + auto rhs_container_type = static_cast(v); + + auto max_container_type = details::max(container_type, rhs_container_type); + + if (container_type != max_container_type) + { + if (max_container_type == details::ContainerType::MEDIUM) + toMedium(); + else if (max_container_type == details::ContainerType::LARGE) + toLarge(); + } + + if (rhs_container_type == details::ContainerType::SMALL) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::MEDIUM) + { + typename Medium::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::LARGE) + getContainer().readAndMerge(in); } void write(DB::WriteBuffer & out) const { - UInt8 v = static_cast(container_type); - writeBinary(v, out); + auto container_type = getContainerType(); + writeBinary(static_cast(container_type), out); if (container_type == details::ContainerType::SMALL) small.write(out); else if (container_type == details::ContainerType::MEDIUM) - medium->write(out); + getContainer().write(out); else if (container_type == details::ContainerType::LARGE) - large->write(out); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - } - - bool isMedium() const - { - return container_type == details::ContainerType::MEDIUM; + getContainer().write(out); } private: void toMedium() { - if (container_type != details::ContainerType::SMALL) + if (getContainerType() != details::ContainerType::SMALL) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(medium)); - - Medium * tmp_medium = new Medium; + auto tmp_medium = std::make_unique(); for (const auto & x : small) tmp_medium->insert(x); - medium = tmp_medium; + new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - container_type = details::ContainerType::MEDIUM; + setContainerType(details::ContainerType::MEDIUM); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(medium)); } void toLarge() { + auto container_type = getContainerType(); + if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(large)); - - Large * tmp_large = new Large; + auto tmp_large = std::make_unique(); if (container_type == details::ContainerType::SMALL) { @@ -233,30 +233,78 @@ private: } else if (container_type == details::ContainerType::MEDIUM) { - for (const auto & x : *medium) + for (const auto & x : getContainer()) tmp_large->insert(x); + + destroy(); } - large = tmp_large; + new (&large) std::unique_ptr{ std::move(tmp_large) }; + + setContainerType(details::ContainerType::LARGE); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(large)); + + } + + void NO_INLINE destroy() + { + auto container_type = getContainerType(); + + clearContainerType(); if (container_type == details::ContainerType::MEDIUM) { - delete medium; - medium = nullptr; - + medium.std::unique_ptr::~unique_ptr(); if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } + else if (container_type == details::ContainerType::LARGE) + { + large.std::unique_ptr::~unique_ptr(); + if (current_memory_tracker) + current_memory_tracker->free(sizeof(large)); + } + } - container_type = details::ContainerType::LARGE; + template + inline T & getContainer() + { + return *reinterpret_cast(address & mask); + } + + template + inline const T & getContainer() const + { + return *reinterpret_cast(address & mask); + } + + void setContainerType(details::ContainerType t) + { + address |= static_cast(t); + } + + inline details::ContainerType getContainerType() const + { + return static_cast(address & ~mask); + } + + void clearContainerType() + { + address &= mask; } private: Small small; - Medium * medium = nullptr; - Large * large = nullptr; - const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; - details::ContainerType container_type = details::ContainerType::SMALL; + union + { + std::unique_ptr medium; + std::unique_ptr large; + UInt64 address = 0; + }; + static const UInt64 mask = 0xFFFFFFFC; + static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; }; } diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 67196746ae5..0b216e1ca0e 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -251,6 +251,7 @@ class HashTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; template friend class TwoLevelHashTable; @@ -429,6 +430,51 @@ public: free(); } + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return cell.getValue(); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; class iterator { @@ -757,7 +803,7 @@ public: { Cell x; x.read(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } @@ -781,7 +827,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } diff --git a/dbms/include/DB/Common/HashTable/SmallTable.h b/dbms/include/DB/Common/HashTable/SmallTable.h index 10ec8479b93..c68963a4798 100644 --- a/dbms/include/DB/Common/HashTable/SmallTable.h +++ b/dbms/include/DB/Common/HashTable/SmallTable.h @@ -27,6 +27,7 @@ class SmallTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; typedef SmallTable Self; typedef Cell cell_type; @@ -66,6 +67,55 @@ public: typedef typename Cell::value_type value_type; + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + + if (size > capacity) + throw DB::Exception("Illegal size"); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return cell.getValue(); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; + class iterator { Self * container; diff --git a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h index 7932ddfb0e8..405f7c5ca12 100644 --- a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h @@ -114,10 +114,20 @@ public: void readAndMerge(DB::ReadBuffer & in) { - /// Немного не оптимально. - HyperLogLogWithSmallSetOptimization other; - other.read(in); - merge(other); + bool is_rhs_large; + readBinary(is_rhs_large, in); + + if (!isLarge() && is_rhs_large) + toLarge(); + + if (!is_rhs_large) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else + large->readAndMerge(in); } void write(DB::WriteBuffer & out) const diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 937b06d5ce0..8fae35ea601 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -283,6 +283,7 @@ namespace ErrorCodes INDEX_NOT_USED = 277, LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, + NO_AVAILABLE_DATA = 280, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference new file mode 100644 index 00000000000..64f3c19bb38 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference @@ -0,0 +1,416 @@ +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 159 +1 164 +3 165 +6 162 +7 160 +9 164 +10 81 +11 158 +13 161 +14 160 +17 163 +19 164 +20 159 +21 161 +22 159 +26 160 +31 164 +35 160 +36 161 +0 54571 +1 55013 +3 52912 +6 52353 +7 54011 +9 54138 +10 26870 +11 54554 +13 53951 +14 53396 +17 55227 +19 55115 +20 54370 +21 54268 +22 54620 +26 53394 +31 54151 +35 54328 +36 52997 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 161 +0.125 160 +0.5 164 +0.05 164 +0.143 162 +0.091 81 +0.056 163 +0.048 159 +0.083 158 +0.25 165 +1 159 +0.1 164 +0.028 160 +0.027 161 +0.031 164 +0.067 160 +0.043 159 +0.037 160 +0.071 161 +0.045 54268 +0.125 54011 +0.5 55013 +0.05 55115 +0.143 52353 +0.091 26870 +0.056 55227 +0.048 54370 +0.083 54554 +0.25 52912 +1 54571 +0.1 54138 +0.028 54328 +0.027 52997 +0.031 54151 +0.067 53396 +0.043 54620 +0.037 53394 +0.071 53951 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 164 +0.05 164 +0.25 165 +0.048 159 +0.091 81 +0.043 159 +0.071 161 +0.083 158 +0.125 160 +0.031 164 +0.143 162 +0.028 160 +0.067 160 +0.045 161 +0.027 161 +0.056 163 +0.037 160 +0.1 164 +1 159 +0.5 55013 +0.05 55115 +0.25 52912 +0.048 54370 +0.091 26870 +0.043 54620 +0.071 53951 +0.083 54554 +0.125 54011 +0.031 54151 +0.143 52353 +0.028 54328 +0.067 53396 +0.045 54268 +0.027 52997 +0.056 55227 +0.037 53394 +0.1 54138 +1 54571 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 158 +3 162 +6 163 +7 162 +9 162 +10 79 +11 162 +13 163 +14 160 +17 163 +19 158 +20 162 +21 157 +22 164 +26 162 +31 161 +35 162 +36 163 +0 54029 +1 53772 +3 53540 +6 54012 +7 53910 +9 52761 +10 26462 +11 52701 +13 54505 +14 53790 +17 54064 +19 55420 +20 56686 +21 52639 +22 54251 +26 53827 +31 53574 +35 55022 +36 53961 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54053 +6 54054 +7 54054 +9 54053 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54053 +22 54054 +26 54054 +31 54054 +35 54054 +36 54053 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54053 +0.125 54054 +0.5 54054 +0.05 54054 +0.143 54054 +0.091 27027 +0.056 54054 +0.048 54054 +0.083 54055 +0.25 54053 +1 54054 +0.1 54053 +0.028 54054 +0.027 54053 +0.031 54054 +0.067 54054 +0.043 54054 +0.037 54054 +0.071 54054 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54054 +0.05 54054 +0.25 54053 +0.048 54054 +0.091 27027 +0.043 54054 +0.071 54054 +0.083 54055 +0.125 54054 +0.031 54054 +0.143 54054 +0.028 54054 +0.067 54054 +0.045 54053 +0.027 54053 +0.056 54054 +0.037 54054 +0.1 54053 +1 54054 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54054 +9 54054 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54054 +22 54054 +26 54054 +31 54054 +35 54054 +36 54054 diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql new file mode 100644 index 00000000000..2886daeb3b3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql @@ -0,0 +1,35 @@ +/* uniqHLL12 */ + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +/* uniqCombined */ + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; From a68b3891f872fb1ca5d2e1fcb68e2e15b63ac9b2 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 19:59:15 +0300 Subject: [PATCH 54/88] dbms: Server: Updated functional tests. [#METR-17276] --- .../00211_aggregate_function_uniq.reference | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference index 64f3c19bb38..288258c7d81 100644 --- a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference @@ -239,25 +239,25 @@ 31 162 35 162 36 162 -0 54054 -1 54054 -3 54053 -6 54054 -7 54054 -9 54053 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54053 -22 54054 -26 54054 -31 54054 -35 54054 -36 54053 +0 53988 +1 54083 +3 53994 +6 53948 +7 54209 +9 54112 +10 27000 +11 54058 +13 54158 +14 53926 +17 54094 +19 54127 +20 54065 +21 54207 +22 54056 +26 53982 +31 54156 +35 53960 +36 54076 0.125 1 0.5 1 0.05 1 @@ -291,25 +291,25 @@ 0.043 162 0.037 162 0.071 162 -0.045 54053 -0.125 54054 -0.5 54054 -0.05 54054 -0.143 54054 -0.091 27027 -0.056 54054 -0.048 54054 -0.083 54055 -0.25 54053 -1 54054 -0.1 54053 -0.028 54054 -0.027 54053 -0.031 54054 -0.067 54054 -0.043 54054 -0.037 54054 -0.071 54054 +0.045 54207 +0.125 54209 +0.5 54083 +0.05 54127 +0.143 53948 +0.091 27000 +0.056 54094 +0.048 54065 +0.083 54058 +0.25 53994 +1 53988 +0.1 54112 +0.028 53960 +0.027 54076 +0.031 54156 +0.067 53926 +0.043 54056 +0.037 53982 +0.071 54158 0.5 1 0.05 1 0.25 1 @@ -343,25 +343,25 @@ 0.037 162 0.1 163 1 162 -0.5 54054 -0.05 54054 -0.25 54053 -0.048 54054 -0.091 27027 -0.043 54054 -0.071 54054 -0.083 54055 -0.125 54054 -0.031 54054 -0.143 54054 -0.028 54054 -0.067 54054 -0.045 54053 -0.027 54053 -0.056 54054 -0.037 54054 -0.1 54053 -1 54054 +0.5 54083 +0.05 54127 +0.25 53994 +0.048 54065 +0.091 27000 +0.043 54056 +0.071 54158 +0.083 54058 +0.125 54209 +0.031 54156 +0.143 53948 +0.028 53960 +0.067 53926 +0.045 54207 +0.027 54076 +0.056 54094 +0.037 53982 +0.1 54112 +1 53988 1 1 3 1 6 1 @@ -395,22 +395,22 @@ 31 162 35 162 36 162 -0 54054 -1 54054 -3 54054 -6 54054 -7 54054 -9 54054 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54054 -22 54054 -26 54054 -31 54054 -35 54054 -36 54054 +0 54195 +1 54086 +3 54127 +6 54173 +7 53969 +9 54257 +10 26985 +11 53880 +13 54105 +14 54043 +17 54176 +19 53913 +20 54088 +21 53991 +22 54112 +26 54136 +31 54074 +35 54153 +36 53999 From 1bf22c463b6ce99c66b08bcf273e279e93088e45 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 20:09:02 +0300 Subject: [PATCH 55/88] dbms: Server: Renamed functional test. [#METR-17276] --- ...ion_uniq.reference => 00212_aggregate_function_uniq.referemce} | 0 ...regate_function_uniq.sql => 00212_aggregate_function_uniq.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00211_aggregate_function_uniq.reference => 00212_aggregate_function_uniq.referemce} (100%) rename dbms/tests/queries/0_stateless/{00211_aggregate_function_uniq.sql => 00212_aggregate_function_uniq.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce similarity index 100% rename from dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference rename to dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql rename to dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.sql From ed2ec39899544eb9cc8e4d5fd42048130612fcdb Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 20:35:34 +0300 Subject: [PATCH 56/88] dbms: Server: Fixes. [#METR-17276] --- dbms/include/DB/Common/HashTable/HashTable.h | 4 ++-- ...uniq.referemce => 00212_aggregate_function_uniq.reference} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename dbms/tests/queries/0_stateless/{00212_aggregate_function_uniq.referemce => 00212_aggregate_function_uniq.reference} (100%) diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 0b216e1ca0e..02c157035a8 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -803,7 +803,7 @@ public: { Cell x; x.read(rb); - insert(x.getValue()); + insert(x); } } @@ -827,7 +827,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(x.getValue()); + insert(x); } } diff --git a/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce rename to dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference From 071c1af212082e5fb5e05e5a12f501448d06b406 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Aug 2015 08:10:43 +0300 Subject: [PATCH 57/88] dbms: fixed error [#METR-17606]. --- dbms/include/DB/Parsers/ASTCreateQuery.h | 7 ++++--- dbms/include/DB/Parsers/ASTJoin.h | 4 +++- dbms/include/DB/Parsers/ASTSubquery.h | 7 ++++--- dbms/include/DB/Parsers/IAST.h | 2 +- dbms/src/Parsers/ASTFunction.cpp | 2 -- dbms/src/Parsers/ASTSelectQuery.cpp | 4 +++- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTCreateQuery.h b/dbms/include/DB/Parsers/ASTCreateQuery.h index 57cfc1a35b2..b76cfedc2d2 100644 --- a/dbms/include/DB/Parsers/ASTCreateQuery.h +++ b/dbms/include/DB/Parsers/ASTCreateQuery.h @@ -30,7 +30,7 @@ public: ASTCreateQuery() = default; ASTCreateQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; }; @@ -87,8 +87,9 @@ protected: if (columns) { settings.ostr << (settings.one_line ? " (" : "\n("); - ++frame.indent; - columns->formatImpl(settings, state, frame); + FormatStateStacked frame_nested = frame; + ++frame_nested.indent; + columns->formatImpl(settings, state, frame_nested); settings.ostr << (settings.one_line ? ")" : "\n)"); } diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 029d4f49350..859b7b3cfcf 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -106,7 +106,9 @@ protected: settings.ostr << "JOIN " << (settings.hilite ? hilite_none : ""); - table->formatImpl(settings, state, frame); + FormatStateStacked frame_with_indent = frame; + ++frame_with_indent.indent; + table->formatImpl(settings, state, frame_with_indent); if (kind != ASTJoin::Cross) { diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index 7447a47350d..55d7621129f 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -42,9 +42,10 @@ protected: std::string nl_or_nothing = settings.one_line ? "" : "\n"; settings.ostr << nl_or_nothing << indent_str << "(" << nl_or_nothing; - FormatStateStacked frame_dont_need_parens = frame; - frame_dont_need_parens.need_parens = false; - children[0]->formatImpl(settings, state, frame_dont_need_parens); + FormatStateStacked frame_nested = frame; + frame_nested.need_parens = false; + ++frame_nested.indent; + children[0]->formatImpl(settings, state, frame_nested); settings.ostr << nl_or_nothing << indent_str << ")"; } }; diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 2f3bc41db60..e92b0877c7a 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -164,7 +164,7 @@ public: /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. struct FormatStateStacked { - bool indent = 0; + UInt8 indent = 0; bool need_parens = false; const IAST * current_select = nullptr; }; diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index f014f13a6ef..8a4b1083bc2 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -39,8 +39,6 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (name == "negate" && typeid_cast(&*arguments->children[0])) settings.ostr << ' '; - FormatStateStacked nested_frame; - nested_frame.need_parens = true; arguments->formatImpl(settings, state, nested_need_parens); written = true; } diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 35c4214e75c..fbb22ad0dd0 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -248,7 +248,9 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F else s.ostr << "\n" << indent_str << "(\n"; - table->formatImpl(s, state, frame); + FormatStateStacked frame_with_indent = frame; + ++frame_with_indent.indent; + table->formatImpl(s, state, frame_with_indent); if (s.one_line) s.ostr << ")"; From 753a90b9302cac0f3e8e87c0009dcf623c5d5e50 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 10 Aug 2015 13:05:03 +0300 Subject: [PATCH 58/88] dbms: Server: Deleted obsolete files. [#METR-17276] --- .../00211_aggregate_function_uniq.reference | 416 ------------------ .../00211_aggregate_function_uniq.sql | 35 -- 2 files changed, 451 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference delete mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference deleted file mode 100644 index 64f3c19bb38..00000000000 --- a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference +++ /dev/null @@ -1,416 +0,0 @@ -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 159 -1 164 -3 165 -6 162 -7 160 -9 164 -10 81 -11 158 -13 161 -14 160 -17 163 -19 164 -20 159 -21 161 -22 159 -26 160 -31 164 -35 160 -36 161 -0 54571 -1 55013 -3 52912 -6 52353 -7 54011 -9 54138 -10 26870 -11 54554 -13 53951 -14 53396 -17 55227 -19 55115 -20 54370 -21 54268 -22 54620 -26 53394 -31 54151 -35 54328 -36 52997 -0.125 1 -0.5 1 -0.05 1 -0.143 1 -0.056 1 -0.048 2 -0.083 1 -0.25 1 -0.1 1 -0.028 1 -0.027 1 -0.031 1 -0.067 1 -0.037 1 -0.045 161 -0.125 160 -0.5 164 -0.05 164 -0.143 162 -0.091 81 -0.056 163 -0.048 159 -0.083 158 -0.25 165 -1 159 -0.1 164 -0.028 160 -0.027 161 -0.031 164 -0.067 160 -0.043 159 -0.037 160 -0.071 161 -0.045 54268 -0.125 54011 -0.5 55013 -0.05 55115 -0.143 52353 -0.091 26870 -0.056 55227 -0.048 54370 -0.083 54554 -0.25 52912 -1 54571 -0.1 54138 -0.028 54328 -0.027 52997 -0.031 54151 -0.067 53396 -0.043 54620 -0.037 53394 -0.071 53951 -0.5 1 -0.05 1 -0.25 1 -0.048 2 -0.083 1 -0.125 1 -0.031 1 -0.143 1 -0.028 1 -0.067 1 -0.027 1 -0.056 1 -0.037 1 -0.1 1 -0.5 164 -0.05 164 -0.25 165 -0.048 159 -0.091 81 -0.043 159 -0.071 161 -0.083 158 -0.125 160 -0.031 164 -0.143 162 -0.028 160 -0.067 160 -0.045 161 -0.027 161 -0.056 163 -0.037 160 -0.1 164 -1 159 -0.5 55013 -0.05 55115 -0.25 52912 -0.048 54370 -0.091 26870 -0.043 54620 -0.071 53951 -0.083 54554 -0.125 54011 -0.031 54151 -0.143 52353 -0.028 54328 -0.067 53396 -0.045 54268 -0.027 52997 -0.056 55227 -0.037 53394 -0.1 54138 -1 54571 -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 162 -1 158 -3 162 -6 163 -7 162 -9 162 -10 79 -11 162 -13 163 -14 160 -17 163 -19 158 -20 162 -21 157 -22 164 -26 162 -31 161 -35 162 -36 163 -0 54029 -1 53772 -3 53540 -6 54012 -7 53910 -9 52761 -10 26462 -11 52701 -13 54505 -14 53790 -17 54064 -19 55420 -20 56686 -21 52639 -22 54251 -26 53827 -31 53574 -35 55022 -36 53961 -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 162 -1 162 -3 162 -6 162 -7 163 -9 163 -10 81 -11 163 -13 162 -14 162 -17 162 -19 162 -20 162 -21 162 -22 162 -26 162 -31 162 -35 162 -36 162 -0 54054 -1 54054 -3 54053 -6 54054 -7 54054 -9 54053 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54053 -22 54054 -26 54054 -31 54054 -35 54054 -36 54053 -0.125 1 -0.5 1 -0.05 1 -0.143 1 -0.056 1 -0.048 2 -0.083 1 -0.25 1 -0.1 1 -0.028 1 -0.027 1 -0.031 1 -0.067 1 -0.037 1 -0.045 162 -0.125 163 -0.5 162 -0.05 162 -0.143 162 -0.091 81 -0.056 162 -0.048 162 -0.083 163 -0.25 162 -1 162 -0.1 163 -0.028 162 -0.027 162 -0.031 162 -0.067 162 -0.043 162 -0.037 162 -0.071 162 -0.045 54053 -0.125 54054 -0.5 54054 -0.05 54054 -0.143 54054 -0.091 27027 -0.056 54054 -0.048 54054 -0.083 54055 -0.25 54053 -1 54054 -0.1 54053 -0.028 54054 -0.027 54053 -0.031 54054 -0.067 54054 -0.043 54054 -0.037 54054 -0.071 54054 -0.5 1 -0.05 1 -0.25 1 -0.048 2 -0.083 1 -0.125 1 -0.031 1 -0.143 1 -0.028 1 -0.067 1 -0.027 1 -0.056 1 -0.037 1 -0.1 1 -0.5 162 -0.05 162 -0.25 162 -0.048 162 -0.091 81 -0.043 162 -0.071 162 -0.083 163 -0.125 163 -0.031 162 -0.143 162 -0.028 162 -0.067 162 -0.045 162 -0.027 162 -0.056 162 -0.037 162 -0.1 163 -1 162 -0.5 54054 -0.05 54054 -0.25 54053 -0.048 54054 -0.091 27027 -0.043 54054 -0.071 54054 -0.083 54055 -0.125 54054 -0.031 54054 -0.143 54054 -0.028 54054 -0.067 54054 -0.045 54053 -0.027 54053 -0.056 54054 -0.037 54054 -0.1 54053 -1 54054 -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 162 -1 162 -3 162 -6 162 -7 163 -9 163 -10 81 -11 163 -13 162 -14 162 -17 162 -19 162 -20 162 -21 162 -22 162 -26 162 -31 162 -35 162 -36 162 -0 54054 -1 54054 -3 54054 -6 54054 -7 54054 -9 54054 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54054 -22 54054 -26 54054 -31 54054 -35 54054 -36 54054 diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql deleted file mode 100644 index 2886daeb3b3..00000000000 --- a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql +++ /dev/null @@ -1,35 +0,0 @@ -/* uniqHLL12 */ - -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -/* uniqCombined */ - -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; From 053ea72bfe9b451c6e6b497bffb2d979cf9aa173 Mon Sep 17 00:00:00 2001 From: Anton Tikhonov Date: Thu, 6 Aug 2015 22:01:57 +0300 Subject: [PATCH 59/88] Upgrade number parser in JSON (and ReadHelpers) [#METR-17621] --- dbms/include/DB/IO/ReadHelpers.h | 57 ++++++++++++++------------ dbms/src/IO/tests/parse_nums_check.cpp | 38 +++++++++++++++++ 2 files changed, 68 insertions(+), 27 deletions(-) create mode 100644 dbms/src/IO/tests/parse_nums_check.cpp diff --git a/dbms/include/DB/IO/ReadHelpers.h b/dbms/include/DB/IO/ReadHelpers.h index 12302a33436..1676c9e40ce 100644 --- a/dbms/include/DB/IO/ReadHelpers.h +++ b/dbms/include/DB/IO/ReadHelpers.h @@ -129,18 +129,20 @@ void readIntText(T & x, ReadBuffer & buf) if (buf.eof()) throwReadAfterEOF(); + if (*buf.position() == '-') + { + if (!std::is_signed::value) + return; + negative = true; + ++buf.position(); + } + else if (*buf.position() == '+') + ++buf.position(); + while (!buf.eof()) { switch (*buf.position()) { - case '+': - break; - case '-': - if (std::is_signed::value) - negative = true; - else - return; - break; case '0': case '1': case '2': @@ -216,6 +218,9 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf) template void readFloatText(T & x, ReadBuffer & buf) { + /// Если вдруг тут перед каждым return надо будет еще что-то делать, типа домножать на экспоненту -- это можно сделать тут. +#define SCOPE_GUARDED_RETURN do { if (negative) x = -x; return; } while (0) + bool negative = false; x = 0; bool after_point = false; @@ -224,16 +229,21 @@ void readFloatText(T & x, ReadBuffer & buf) if (buf.eof()) throwReadAfterEOF(); + if (*buf.position() == '-') + { + negative = true; + ++buf.position(); + } + else if (*buf.position() == '+') + ++buf.position(); + while (!buf.eof()) { switch (*buf.position()) { - case '+': - break; - case '-': - negative = true; - break; case '.': + if (after_point) + SCOPE_GUARDED_RETURN; after_point = true; break; case '0': @@ -264,24 +274,18 @@ void readFloatText(T & x, ReadBuffer & buf) Int32 exponent = 0; readIntText(exponent, buf); x *= exp10(exponent); - if (negative) - x = -x; - return; + SCOPE_GUARDED_RETURN; } case 'i': ++buf.position(); assertString("nf", buf); x = std::numeric_limits::infinity(); - if (negative) - x = -x; - return; + SCOPE_GUARDED_RETURN; case 'I': ++buf.position(); assertString("NF", buf); x = std::numeric_limits::infinity(); - if (negative) - x = -x; - return; + SCOPE_GUARDED_RETURN; case 'n': ++buf.position(); assertString("an", buf); @@ -293,14 +297,13 @@ void readFloatText(T & x, ReadBuffer & buf) x = std::numeric_limits::quiet_NaN(); return; default: - if (negative) - x = -x; - return; + SCOPE_GUARDED_RETURN; } ++buf.position(); } - if (negative) - x = -x; + SCOPE_GUARDED_RETURN; + +#undef SCOPE_GUARDED_RETURN } diff --git a/dbms/src/IO/tests/parse_nums_check.cpp b/dbms/src/IO/tests/parse_nums_check.cpp new file mode 100644 index 00000000000..3bde803aa9f --- /dev/null +++ b/dbms/src/IO/tests/parse_nums_check.cpp @@ -0,0 +1,38 @@ +#include +#include + +#include + +int main() +{ + const char input[] = "1 1.0 10.5 115e2 -5 -5.0 10- 7+8 90-3 .5 127.0.0.1 +1 +1-1"; + DB::ReadBuffer buf(const_cast(input), strlen(input), 0); + + Int64 i; + double f; + double Epsilon = 1e-10; + int t = 0; + +#define CHECK(x, y) do { DB::readText(x, buf); ++t; if (((x-y) > Epsilon) || (y-x) > Epsilon) return t; buf.ignore();} while (0); + CHECK(i, 1); + CHECK(f, 1.0f); + CHECK(f, 10.5f); + CHECK(f, 115e2); + CHECK(i, -5); + CHECK(f, -5); + CHECK(i, 10); + buf.ignore(); + CHECK(i, 7); + buf.ignore(2); + CHECK(i, 90); + buf.ignore(2); + /// Интересный случай: хотим ли мы, чтобы .5 парсилось как 0.5? Вроде бы это уместно. + CHECK(f, 0.5f); + CHECK(f, 127); + buf.ignore(4); // "0.1 " + CHECK(i, 1); + CHECK(i, 1); +#undef CHECK + + return 0; +} From 99f0783b04895cdb02601f90d31704541c549d44 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 10 Aug 2015 16:47:43 +0300 Subject: [PATCH 60/88] dbms: Server: Fixed pointer computation + simplified code. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 00a01232b31..4e8669c0af1 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -209,8 +209,7 @@ private: for (const auto & x : small) tmp_medium->insert(x); - new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - + medium = tmp_medium.release(); setContainerType(details::ContainerType::MEDIUM); if (current_memory_tracker) @@ -239,8 +238,7 @@ private: destroy(); } - new (&large) std::unique_ptr{ std::move(tmp_large) }; - + large = tmp_large.release(); setContainerType(details::ContainerType::LARGE); if (current_memory_tracker) @@ -256,13 +254,17 @@ private: if (container_type == details::ContainerType::MEDIUM) { - medium.std::unique_ptr::~unique_ptr(); + delete medium; + medium = nullptr; + if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } else if (container_type == details::ContainerType::LARGE) { - large.std::unique_ptr::~unique_ptr(); + delete large; + large = nullptr; + if (current_memory_tracker) current_memory_tracker->free(sizeof(large)); } @@ -282,6 +284,7 @@ private: void setContainerType(details::ContainerType t) { + address &= mask; address |= static_cast(t); } @@ -299,11 +302,11 @@ private: Small small; union { - std::unique_ptr medium; - std::unique_ptr large; + Medium * medium; + Large * large; UInt64 address = 0; }; - static const UInt64 mask = 0xFFFFFFFC; + static const UInt64 mask = 0xFFFFFFFFFFFFFFFC; static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; }; From 9e1486bfdeed3b29c3d7101af7aeef425ccda613 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 11 Aug 2015 20:31:31 +0300 Subject: [PATCH 61/88] =?UTF-8?q?dbms:=20Server:=20Fixed=20interaction=20w?= =?UTF-8?q?ith=20=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88?= =?UTF-8?q?=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88.=20[#METR-17276]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 2 +- dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 4e8669c0af1..c6a2ff2d119 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -34,7 +34,7 @@ template UInt8 medium_set_power2_max, UInt8 K, typename Hash = IntHash32, - typename DenominatorType = float + typename DenominatorType = double > class CombinedCardinalityEstimator { diff --git a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h index 405f7c5ca12..e95811ce27b 100644 --- a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h @@ -16,7 +16,7 @@ template < UInt8 small_set_size, UInt8 K, typename Hash = IntHash32, - typename DenominatorType = float> + typename DenominatorType = double> class HyperLogLogWithSmallSetOptimization { private: From 6448560938c39c615f4c67203127bc343990e98c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 00:11:54 +0300 Subject: [PATCH 62/88] dbms: allowed to specify 'interserver_http_host' in metrika.xml for metrika package [#MTRSADMIN-1483]. --- dbms/src/Server/Server.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 3c2e42a5ccb..a2bc499db88 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -485,15 +485,12 @@ int Server::main(const std::vector & args) if (config().has("interserver_http_port")) { - String this_host; - if (config().has("interserver_http_host")) - { - this_host = config().getString("interserver_http_host"); - } - else + String this_host = config().getString("interserver_http_host", ""); + + if (this_host.empty()) { this_host = getFQDNOrHostName(); - LOG_DEBUG(log, "Configuration parameter 'interserver_http_host' doesn't exist. Will use '" + this_host + "' as replica host."); + LOG_DEBUG(log, "Configuration parameter 'interserver_http_host' doesn't exist or exists and empty. Will use '" + this_host + "' as replica host."); } String port_str = config().getString("interserver_http_port"); From 8d8fa9d0fdea89e69d9fe8f0a0eb9bb22178d724 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 00:29:44 +0300 Subject: [PATCH 63/88] dbms: more logging in MySQLDictionarySource [#METR-17508]. --- dbms/include/DB/Dictionaries/MySQLDictionarySource.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h index c0ee3974d49..e791246c35c 100644 --- a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h @@ -65,6 +65,8 @@ public: } private: + Logger * log = &Logger::get("MySQLDictionarySource"); + mysqlxx::DateTime getLastModification() const { const auto Update_time_idx = 12; @@ -74,6 +76,9 @@ private: { auto connection = pool.Get(); auto query = connection->query("SHOW TABLE STATUS LIKE '%" + strconvert::escaped_for_like(table) + "%';"); + + LOG_TRACE(log, query.str()); + auto result = query.use(); if (auto row = result.fetch()) @@ -144,6 +149,8 @@ private: writeChar(';', out); } + LOG_TRACE(log, query); + return query; } From f39ad593f595fdb6902171239bde44b1cf67057b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 00:32:27 +0300 Subject: [PATCH 64/88] dbms: added optional property 'require_nonempty' for external dictionaries [#METR-17508]. --- dbms/include/DB/Core/ErrorCodes.h | 1 + dbms/include/DB/Dictionaries/FlatDictionary.h | 11 ++++++++--- .../DB/Dictionaries/HashedDictionary.h | 11 ++++++++--- .../DB/Dictionaries/RangeHashedDictionary.h | 11 ++++++++--- dbms/src/Interpreters/DictionaryFactory.cpp | 19 ++++++++++++------- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 8fae35ea601..3336d80fcf4 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -284,6 +284,7 @@ namespace ErrorCodes LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, NO_AVAILABLE_DATA = 280, + DICTIONARY_IS_EMPTY = 281, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 1ae5b976fde..b1267bc618f 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -20,9 +20,10 @@ class FlatDictionary final : public IDictionary { public: FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime) + DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty) : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), + require_nonempty(require_nonempty) { createAttributes(); @@ -40,7 +41,7 @@ public: } FlatDictionary(const FlatDictionary & other) - : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} + : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} {} std::exception_ptr getCreationException() const override { return creation_exception; } @@ -198,6 +199,9 @@ private: } stream->readSuffix(); + + if (require_nonempty && 0 == element_count) + throw Exception("Dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY); } template @@ -348,6 +352,7 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const bool require_nonempty; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index 08aad57d63a..e356808fec5 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -18,9 +18,10 @@ class HashedDictionary final : public IDictionary { public: HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime) + DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty) : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), + require_nonempty(require_nonempty) { createAttributes(); @@ -38,7 +39,7 @@ public: } HashedDictionary(const HashedDictionary & other) - : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} + : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} {} std::exception_ptr getCreationException() const override { return creation_exception; } @@ -196,6 +197,9 @@ private: } stream->readSuffix(); + + if (require_nonempty && 0 == element_count) + throw Exception("Dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY); } template @@ -334,6 +338,7 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const bool require_nonempty; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h index 0435baecc93..a00208c5b48 100644 --- a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h +++ b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h @@ -19,9 +19,10 @@ class RangeHashedDictionary final : public IDictionaryBase public: RangeHashedDictionary( const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime) + const DictionaryLifetime dict_lifetime, bool require_nonempty) : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), + require_nonempty(require_nonempty) { createAttributes(); @@ -39,7 +40,7 @@ public: } RangeHashedDictionary(const RangeHashedDictionary & other) - : RangeHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} + : RangeHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} {} std::exception_ptr getCreationException() const override { return creation_exception; } @@ -218,6 +219,9 @@ private: } stream->readSuffix(); + + if (require_nonempty && 0 == element_count) + throw Exception("Dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY); } template @@ -410,6 +414,7 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const bool require_nonempty; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/src/Interpreters/DictionaryFactory.cpp b/dbms/src/Interpreters/DictionaryFactory.cpp index 23a434c6eef..688fe114f20 100644 --- a/dbms/src/Interpreters/DictionaryFactory.cpp +++ b/dbms/src/Interpreters/DictionaryFactory.cpp @@ -31,6 +31,8 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + const auto & layout_type = keys.front(); if ("range_hashed" == layout_type) @@ -41,7 +43,7 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab ErrorCodes::BAD_ARGUMENTS }; - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); } else { @@ -49,16 +51,15 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab throw Exception{ "Elements .structure.range_min and .structure.range_max should be defined only " "for a dictionary of layout 'range_hashed'", - ErrorCodes::BAD_ARGUMENTS - }; + ErrorCodes::BAD_ARGUMENTS}; if ("flat" == layout_type) { - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); } else if ("hashed" == layout_type) { - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); } else if ("cache" == layout_type) { @@ -66,8 +67,12 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab if (size == 0) throw Exception{ "Dictionary of layout 'cache' cannot have 0 cells", - ErrorCodes::TOO_SMALL_BUFFER_SIZE - }; + ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + if (require_nonempty) + throw Exception{ + "Dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); } From 0d6f4ffae1af41ee0f3e01424e9c85f32a87851e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 04:18:30 +0300 Subject: [PATCH 65/88] dbms: fixed error with dictionaries [#METR-17666]. --- dbms/include/DB/Functions/FunctionsDictionaries.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index d49a8ed8180..cf7eaea60ff 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -862,7 +862,7 @@ private: dict->getString(attr_name, ids, out.get()); block.getByPosition(result).column = new ColumnConst{ - id_col->size(), out->getDataAtWithTerminatingZero(0).toString() + id_col->size(), out->getDataAt(0).toString() }; } else @@ -967,7 +967,7 @@ private: dictionary->getString(attr_name, ids, dates, out.get()); block.getByPosition(result).column = new ColumnConst{ - id_col->size(), out->getDataAtWithTerminatingZero(0).toString() + id_col->size(), out->getDataAt(0).toString() }; } else From dea3c8b8a4d84dd482e29d1f9bbc18bec7a48f4f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 06:57:32 +0300 Subject: [PATCH 66/88] dbms: external dictionaries: fixed bunch of errors; added optional parameter 'dont_check_update_time' for MySQL dictionary source [#METR-17508]. --- dbms/include/DB/Core/Exception.h | 4 +- .../DB/Dictionaries/FileDictionarySource.h | 3 +- .../DB/Dictionaries/MySQLDictionarySource.h | 46 +++++++++++---- .../DB/Interpreters/ExternalDictionaries.h | 12 +++- dbms/src/Core/Exception.cpp | 8 +-- .../src/Interpreters/ExternalDictionaries.cpp | 59 ++++++------------- 6 files changed, 70 insertions(+), 62 deletions(-) diff --git a/dbms/include/DB/Core/Exception.h b/dbms/include/DB/Core/Exception.h index 3d28e412137..e9d8ec3c737 100644 --- a/dbms/include/DB/Core/Exception.h +++ b/dbms/include/DB/Core/Exception.h @@ -28,8 +28,8 @@ ExceptionPtr cloneCurrentException(); /** Попробовать записать исключение в лог (и забыть про него). * Можно использовать в деструкторах в блоке catch (...). */ -void tryLogCurrentException(const char * log_name); -void tryLogCurrentException(Poco::Logger * logger); +void tryLogCurrentException(const char * log_name, const std::string & start_of_message = ""); +void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = ""); std::string getCurrentExceptionMessage(bool with_stacktrace); diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 41e55f64de5..a2f9b0d3c9a 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -20,8 +20,7 @@ class FileDictionarySource final : public IDictionarySource public: FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, const Context & context) - : filename{filename}, format{format}, sample_block{sample_block}, context(context), - last_modification{getLastModification()} + : filename{filename}, format{format}, sample_block{sample_block}, context(context) {} FileDictionarySource(const FileDictionarySource & other) diff --git a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h index e791246c35c..abf58b2767f 100644 --- a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h @@ -23,10 +23,10 @@ public: db{config.getString(config_prefix + ".db", "")}, table{config.getString(config_prefix + ".table")}, where{config.getString(config_prefix + ".where", "")}, + dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)}, sample_block{sample_block}, pool{config, config_prefix}, - load_all_query{composeLoadAllQuery()}, - last_modification{getLastModification()} + load_all_query{composeLoadAllQuery()} {} /// copy-constructor is provided in order to support cloneability @@ -35,6 +35,7 @@ public: db{other.db}, table{other.table}, where{other.where}, + dont_check_update_time{other.dont_check_update_time}, sample_block{other.sample_block}, pool{other.pool}, load_all_query{other.load_all_query}, last_modification{other.last_modification} @@ -43,18 +44,27 @@ public: BlockInputStreamPtr loadAll() override { last_modification = getLastModification(); + + LOG_TRACE(log, load_all_query); return new MySQLBlockInputStream{pool.Get(), load_all_query, sample_block, max_block_size}; } BlockInputStreamPtr loadIds(const std::vector & ids) override { - last_modification = getLastModification(); - const auto query = composeLoadIdsQuery(ids); + /// Здесь не логгируем и не обновляем время модификации, так как запрос может быть большим, и часто задаваться. + const auto query = composeLoadIdsQuery(ids); return new MySQLBlockInputStream{pool.Get(), query, sample_block, max_block_size}; } - bool isModified() const override { return getLastModification() > last_modification; } + bool isModified() const override + { + if (dont_check_update_time) + return true; + + return getLastModification() > last_modification; + } + bool supportsSelectiveLoad() const override { return true; } DictionarySourcePtr clone() const override { return std::make_unique(*this); } @@ -69,28 +79,43 @@ private: mysqlxx::DateTime getLastModification() const { - const auto Update_time_idx = 12; mysqlxx::DateTime update_time{std::time(nullptr)}; + if (dont_check_update_time) + return update_time; + try { auto connection = pool.Get(); - auto query = connection->query("SHOW TABLE STATUS LIKE '%" + strconvert::escaped_for_like(table) + "%';"); + auto query = connection->query("SHOW TABLE STATUS LIKE '" + strconvert::escaped_for_like(table) + "'"); LOG_TRACE(log, query.str()); auto result = query.use(); + size_t fetched_rows = 0; if (auto row = result.fetch()) { - const auto & update_time_value = row[Update_time_idx]; + ++fetched_rows; + const auto UPDATE_TIME_IDX = 12; + const auto & update_time_value = row[UPDATE_TIME_IDX]; if (!update_time_value.isNull()) + { update_time = update_time_value.getDateTime(); + LOG_TRACE(log, "Got update time: " << update_time); + } /// fetch remaining rows to avoid "commands out of sync" error - while (auto row = result.fetch()); + while (auto row = result.fetch()) + ++fetched_rows; } + + if (0 == fetched_rows) + LOG_ERROR(log, "Cannot find table in SHOW TABLE STATUS result."); + + if (fetched_rows > 1) + LOG_ERROR(log, "Found more than one table in SHOW TABLE STATUS result."); } catch (...) { @@ -149,8 +174,6 @@ private: writeChar(';', out); } - LOG_TRACE(log, query); - return query; } @@ -216,6 +239,7 @@ private: const std::string db; const std::string table; const std::string where; + const bool dont_check_update_time; Block sample_block; mutable mysqlxx::PoolWithFailover pool; const std::string load_all_query; diff --git a/dbms/include/DB/Interpreters/ExternalDictionaries.h b/dbms/include/DB/Interpreters/ExternalDictionaries.h index f3747a52cd6..cf2fbe36f10 100644 --- a/dbms/include/DB/Interpreters/ExternalDictionaries.h +++ b/dbms/include/DB/Interpreters/ExternalDictionaries.h @@ -57,9 +57,19 @@ private: std::uint64_t error_count; }; + /** Имя словаря -> словарь. + */ std::unordered_map dictionaries; - std::unordered_map update_times; + + /** Здесь находятся словари, которых ещё ни разу не удалось загрузить. + * В dictionaries они тоже присутствуют, но с нулевым указателем dict. + */ std::unordered_map failed_dictionaries; + + /** И для обычных и для failed_dictionaries. + */ + std::unordered_map update_times; + std::mt19937_64 rnd_engine{getSeed()}; Context & context; diff --git a/dbms/src/Core/Exception.cpp b/dbms/src/Core/Exception.cpp index 789af7beeae..6c2c52baa72 100644 --- a/dbms/src/Core/Exception.cpp +++ b/dbms/src/Core/Exception.cpp @@ -52,16 +52,16 @@ inline std::string demangle(const char * const mangled, int & status) return demangled; } -void tryLogCurrentException(const char * log_name) +void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { - tryLogCurrentException(&Logger::get(log_name)); + tryLogCurrentException(&Logger::get(log_name), start_of_message); } -void tryLogCurrentException(Poco::Logger * logger) +void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message) { try { - LOG_ERROR(logger, getCurrentExceptionMessage(true)); + LOG_ERROR(logger, start_of_message << (start_of_message.empty() ? "" : ": ") << getCurrentExceptionMessage(true)); } catch (...) { diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 0cdb6713c7f..4a84afa97d1 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -64,7 +64,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) try { auto dict_ptr = failed_dictionary.second.dict->clone(); - if (dict_ptr->getCreationException()) + if (const auto exception_ptr = dict_ptr->getCreationException()) { /// recalculate next attempt time std::uniform_int_distribution distribution( @@ -72,10 +72,11 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) failed_dictionary.second.next_attempt_time = std::chrono::system_clock::now() + std::chrono::seconds{ - std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine)) - }; + std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine))}; ++failed_dictionary.second.error_count; + + std::rethrow_exception(exception_ptr); } else { @@ -99,7 +100,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) } catch (...) { - LOG_ERROR(log, "Failed reloading " << name << " dictionary due to unexpected error"); + tryLogCurrentException(log, "Failed reloading '" + name + "' dictionary"); } } @@ -114,6 +115,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) try { + /// Если словарь не удалось ни разу загрузить или даже не удалось инициализировать из конфига. if (!dictionary.second.dict) continue; @@ -144,6 +146,10 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { /// create new version of dictionary auto new_version = current->clone(); + + if (const auto exception_ptr = new_version->getCreationException()) + std::rethrow_exception(exception_ptr); + dictionary.second.dict->set(new_version.release()); } } @@ -155,25 +161,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { dictionary.second.exception = std::current_exception(); - try - { - throw; - } - catch (const Poco::Exception & e) - { - LOG_ERROR(log, "Cannot update external dictionary '" << name - << "'! You must resolve this manually. " << e.displayText()); - } - catch (const std::exception & e) - { - LOG_ERROR(log, "Cannot update external dictionary '" << name - << "'! You must resolve this manually. " << e.what()); - } - catch (...) - { - LOG_ERROR(log, "Cannot update external dictionary '" << name - << "'! You must resolve this manually."); - } + tryLogCurrentException(log, "Cannot update external dictionary '" + name + "', leaving old version."); } } } @@ -235,6 +223,8 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const throw std::runtime_error{"Overriding dictionary from file " + dict_it->second.origin}; auto dict_ptr = DictionaryFactory::instance().create(name, *config, key, context); + + /// Если словарь не удалось загрузить. if (const auto exception_ptr = dict_ptr->getCreationException()) { const auto failed_dict_it = failed_dictionaries.find(name); @@ -292,6 +282,9 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const { if (!name.empty()) { + /// Если для словаря не удалось загрузить данные или даже не удалось инициализировать из конфига. + /// - всё-равно вставляем информацию в dictionaries, с нулевым указателем dict. + const std::lock_guard lock{dictionaries_mutex}; const auto exception_ptr = std::current_exception(); @@ -302,25 +295,7 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const dict_it->second.exception = exception_ptr; } - try - { - throw; - } - catch (const Poco::Exception & e) - { - LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name - << "'! You must resolve this manually. " << e.displayText()); - } - catch (const std::exception & e) - { - LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name - << "'! You must resolve this manually. " << e.what()); - } - catch (...) - { - LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name - << "'! You must resolve this manually."); - } + tryLogCurrentException(log, "Cannot create external dictionary '" + name + "' from config path " + config_path); /// propagate exception if (throw_on_error) From 18ed0b2829c73d9e7a31639088bbe7ac5c40cedd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 07:21:10 +0300 Subject: [PATCH 67/88] dbms: external dictionaries: fixed errors [#METR-17508]. --- dbms/include/DB/Dictionaries/CacheDictionary.h | 6 +++--- dbms/include/DB/Dictionaries/FlatDictionary.h | 2 +- dbms/include/DB/Dictionaries/HashedDictionary.h | 2 +- dbms/include/DB/Dictionaries/RangeHashedDictionary.h | 2 +- dbms/src/Interpreters/ExternalDictionaries.cpp | 2 +- libs/libmysqlxx/src/PoolWithFailover.cpp | 4 +--- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index 07db7c0dda2..15bbe05cb19 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -620,9 +620,9 @@ private: mutable std::mt19937_64 rnd_engine{getSeed()}; mutable std::size_t bytes_allocated = 0; - mutable std::atomic element_count{}; - mutable std::atomic hit_count{}; - mutable std::atomic query_count{}; + mutable std::atomic element_count{0}; + mutable std::atomic hit_count{0}; + mutable std::atomic query_count{0}; const std::chrono::time_point creation_time = std::chrono::system_clock::now(); }; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index b1267bc618f..0c1c2fb33e5 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -361,7 +361,7 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; - mutable std::atomic query_count; + mutable std::atomic query_count{0}; std::chrono::time_point creation_time; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index e356808fec5..a73f8869f81 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -347,7 +347,7 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; - mutable std::atomic query_count{}; + mutable std::atomic query_count{0}; std::chrono::time_point creation_time; diff --git a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h index a00208c5b48..888173b8940 100644 --- a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h +++ b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h @@ -422,7 +422,7 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; - mutable std::atomic query_count{}; + mutable std::atomic query_count{0}; std::chrono::time_point creation_time; diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 4a84afa97d1..0e4525efb9a 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -161,7 +161,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { dictionary.second.exception = std::current_exception(); - tryLogCurrentException(log, "Cannot update external dictionary '" + name + "', leaving old version."); + tryLogCurrentException(log, "Cannot update external dictionary '" + name + "', leaving old version"); } } } diff --git a/libs/libmysqlxx/src/PoolWithFailover.cpp b/libs/libmysqlxx/src/PoolWithFailover.cpp index c98b42036d2..5fe4c64dfdf 100644 --- a/libs/libmysqlxx/src/PoolWithFailover.cpp +++ b/libs/libmysqlxx/src/PoolWithFailover.cpp @@ -13,10 +13,8 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & cfg cfg.keys(config_name, replica_keys); for (Poco::Util::AbstractConfiguration::Keys::const_iterator it = replica_keys.begin(); it != replica_keys.end(); ++it) { - if (!(*it == "port" || *it == "user" || *it == "password" || *it == "db" || *it == "table")) + if (*it == "replica") /// На том же уровне могут быть другие параметры. { - if (it->size() < std::string("replica").size() || it->substr(0, std::string("replica").size()) != "replica") - throw Poco::Exception("Unknown element in config: " + *it + ", expected replica"); std::string replica_name = config_name + "." + *it; Replica replica(new Pool(cfg, replica_name, default_connections, max_connections, config_name.c_str()), cfg.getInt(replica_name + ".priority", 0)); From 3ff1a857fe6019dcc99ff4373ad5e90de13ac173 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 12 Aug 2015 19:26:53 +0300 Subject: [PATCH 68/88] dbms: Server: Use another hash function. [#METR-17276] --- .../AggregateFunctionUniq.h | 45 +++---- .../00212_aggregate_function_uniq.reference | 114 +++++++++--------- 2 files changed, 71 insertions(+), 88 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 8aa6edf22fb..3975f238818 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -98,7 +98,7 @@ struct AggregateFunctionUniqExactData template struct AggregateFunctionUniqCombinedData { - using Key = T; + using Key = UInt32; using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; @@ -118,30 +118,6 @@ struct AggregateFunctionUniqCombinedData namespace detail { -/** Хэширование 64-битных целочисленных значений в 32-битные. - * Источник: https://gist.github.com/badboy/6267743 - */ -template -struct Hash64To32; - -template -struct Hash64To32::value || std::is_same::value>::type> -{ - static UInt32 compute(T key) - { - using U = typename std::make_unsigned::type; - auto x = static_cast(key); - - x = (~x) + (x << 18); - x = x ^ (x >> 31); - x = x * 21; - x = x ^ (x >> 11); - x = x + (x << 6); - x = x ^ (x >> 22); - return static_cast(x); - } -}; - /** Хэш-функция для uniqCombined. */ template @@ -154,26 +130,33 @@ struct CombinedCardinalityTraits }; template -struct CombinedCardinalityTraits::value || std::is_same::value>::type> +struct CombinedCardinalityTraits::value>::type> { - using Op = Hash64To32; + using U = typename std::make_unsigned::type; static UInt32 hash(T key) { - return Op::compute(key); + return intHash32<0>(static_cast(key)); + }; +}; + +template +struct CombinedCardinalityTraits::value>::type> +{ + static UInt32 hash(T key) + { + return intHash32<0>(key); }; }; template struct CombinedCardinalityTraits::value>::type> { - using Op = Hash64To32; - static UInt32 hash(T key) { UInt64 res = 0; memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); - return Op::compute(res); + return intHash32<0>(res); } }; diff --git a/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference index 288258c7d81..d66effa9fb1 100644 --- a/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference @@ -239,25 +239,25 @@ 31 162 35 162 36 162 -0 53988 -1 54083 -3 53994 -6 53948 -7 54209 -9 54112 -10 27000 -11 54058 -13 54158 -14 53926 -17 54094 -19 54127 -20 54065 -21 54207 -22 54056 -26 53982 -31 54156 -35 53960 -36 54076 +0 54226 +1 54034 +3 54016 +6 53982 +7 54076 +9 54218 +10 27075 +11 54093 +13 54108 +14 54096 +17 54294 +19 54070 +20 54028 +21 54170 +22 54106 +26 54103 +31 54050 +35 54130 +36 53868 0.125 1 0.5 1 0.05 1 @@ -291,25 +291,25 @@ 0.043 162 0.037 162 0.071 162 -0.045 54207 -0.125 54209 -0.5 54083 -0.05 54127 -0.143 53948 -0.091 27000 -0.056 54094 -0.048 54065 -0.083 54058 -0.25 53994 -1 53988 -0.1 54112 -0.028 53960 -0.027 54076 -0.031 54156 -0.067 53926 -0.043 54056 -0.037 53982 -0.071 54158 +0.045 54170 +0.125 54076 +0.5 54034 +0.05 54070 +0.143 53982 +0.091 27075 +0.056 54294 +0.048 54028 +0.083 54093 +0.25 54016 +1 54226 +0.1 54218 +0.028 54130 +0.027 53868 +0.031 54050 +0.067 54096 +0.043 54106 +0.037 54103 +0.071 54108 0.5 1 0.05 1 0.25 1 @@ -343,25 +343,25 @@ 0.037 162 0.1 163 1 162 -0.5 54083 -0.05 54127 -0.25 53994 -0.048 54065 -0.091 27000 -0.043 54056 -0.071 54158 -0.083 54058 -0.125 54209 -0.031 54156 -0.143 53948 -0.028 53960 -0.067 53926 -0.045 54207 -0.027 54076 -0.056 54094 -0.037 53982 -0.1 54112 -1 53988 +0.5 54034 +0.05 54070 +0.25 54016 +0.048 54028 +0.091 27075 +0.043 54106 +0.071 54108 +0.083 54093 +0.125 54076 +0.031 54050 +0.143 53982 +0.028 54130 +0.067 54096 +0.045 54170 +0.027 53868 +0.056 54294 +0.037 54103 +0.1 54218 +1 54226 1 1 3 1 6 1 From 5505474f2a1e88a2c6cfad8585e2fd2be3e2bbcd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Aug 2015 00:16:23 +0300 Subject: [PATCH 69/88] dbms: attempt to lower memory reservation when reading String columns with NativeBlockInputStream [#METR-17704]. --- dbms/src/DataTypes/DataTypeString.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 5ae25eb0444..57b718b3ed8 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -137,12 +137,28 @@ void DataTypeString::deserializeBinary(IColumn & column, ReadBuffer & istr, size ColumnString::Chars_t & data = column_string.getChars(); ColumnString::Offsets_t & offsets = column_string.getOffsets(); - /// Выбрано наугад. - constexpr auto avg_value_size_hint_reserve_multiplier = 1.2; + double avg_chars_size; - double avg_chars_size = (avg_value_size_hint && avg_value_size_hint > sizeof(offsets[0]) - ? (avg_value_size_hint - sizeof(offsets[0])) * avg_value_size_hint_reserve_multiplier - : DBMS_APPROX_STRING_SIZE); + if (avg_value_size_hint && avg_value_size_hint > sizeof(offsets[0])) + { + /// Выбрано наугад. + constexpr auto avg_value_size_hint_reserve_multiplier = 1.2; + + avg_chars_size = (avg_value_size_hint - sizeof(offsets[0])) * avg_value_size_hint_reserve_multiplier; + } + else + { + /** Небольшая эвристика для оценки того, что в столбце много пустых строк. + * В этом случае, для экономии оперативки, будем говорить, что средний размер значения маленький. + */ + if (istr.position() + sizeof(UInt32) <= istr.buffer().end() + && *reinterpret_cast(istr.position()) == 0) /// Первые 4 строки находятся в буфере и являются пустыми. + { + avg_chars_size = 1; + } + else + avg_chars_size = DBMS_APPROX_STRING_SIZE; + } data.reserve(data.size() + std::ceil(limit * avg_chars_size)); From cf4a0e85ef996999f3095f2fcafd90f4f73b5bc4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Aug 2015 05:45:40 +0300 Subject: [PATCH 70/88] dbms: lowered memory usage for INSERT [#METR-17704]. --- dbms/include/DB/Interpreters/sortBlock.h | 5 + .../MergeTree/MergedBlockOutputStream.h | 102 +++++++++++------- dbms/src/Interpreters/sortBlock.cpp | 18 +++- .../MergeTree/MergeTreeDataWriter.cpp | 9 +- 4 files changed, 91 insertions(+), 43 deletions(-) diff --git a/dbms/include/DB/Interpreters/sortBlock.h b/dbms/include/DB/Interpreters/sortBlock.h index 4b9982bd603..9026611be5e 100644 --- a/dbms/include/DB/Interpreters/sortBlock.h +++ b/dbms/include/DB/Interpreters/sortBlock.h @@ -18,4 +18,9 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit */ void stableSortBlock(Block & block, const SortDescription & description); +/** То же, что и stableSortBlock, но не сортировать блок, а только рассчитать перестановку значений, + * чтобы потом можно было переставить значения столбцов самостоятельно. + */ +void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation); + } diff --git a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h index aebdaee0143..77572436a37 100644 --- a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h @@ -14,6 +14,8 @@ namespace DB { + + class IMergedBlockOutputStream : public IBlockOutputStream { public: @@ -230,7 +232,9 @@ protected: CompressionMethod compression_method; }; -/** Для записи одного куска. Данные уже отсортированы, относятся к одному месяцу, и пишутся в один кускок. + +/** Для записи одного куска. + * Данные относятся к одному месяцу, и пишутся в один кускок. */ class MergedBlockOutputStream : public IMergedBlockOutputStream { @@ -278,45 +282,18 @@ public: } } + /// Если данные заранее отсортированы. void write(const Block & block) override { - size_t rows = block.rows(); + writeImpl(block, nullptr); + } - /// Сначала пишем индекс. Индекс содержит значение Primary Key для каждой index_granularity строки. - typedef std::vector PrimaryColumns; - PrimaryColumns primary_columns; - - for (const auto & descr : storage.getSortDescription()) - primary_columns.push_back( - !descr.column_name.empty() - ? &block.getByName(descr.column_name) - : &block.getByPosition(descr.column_number)); - - for (size_t i = index_offset; i < rows; i += storage.index_granularity) - { - for (PrimaryColumns::const_iterator it = primary_columns.begin(); it != primary_columns.end(); ++it) - { - if (storage.mode != MergeTreeData::Unsorted) - index_vec.push_back((*(*it)->column)[i]); - - (*it)->type->serializeBinary(index_vec.back(), *index_stream); - } - - ++marks_count; - } - - /// Множество записанных столбцов со смещениями, чтобы не писать общие для вложенных структур столбцы несколько раз - OffsetColumns offset_columns; - - /// Теперь пишем данные. - for (const auto & it : columns_list) - { - const ColumnWithTypeAndName & column = block.getByName(it.name); - writeData(column.name, *column.type, *column.column, offset_columns); - } - - size_t written_for_last_mark = (storage.index_granularity - index_offset + rows) % storage.index_granularity; - index_offset = (storage.index_granularity - written_for_last_mark) % storage.index_granularity; + /** Если данные не отсортированы, но мы заранее вычислили перестановку, после которой они станут сортированными. + * Этот метод используется для экономии оперативки, так как не нужно держать одновременно два блока - исходный и отсортированный. + */ + void writeWithPermutation(const Block & block, const IColumn::Permutation * permutation) + { + writeImpl(block, permutation); } void writeSuffix() override @@ -391,6 +368,57 @@ private: } } + /** Если задана permutation, то переставляет значения в столбцах при записи. + * Это нужно, чтобы не держать целый блок в оперативке для его сортировки. + */ + void writeImpl(const Block & block, const IColumn::Permutation * permutation) + { + size_t rows = block.rows(); + + /// Сначала пишем индекс. Индекс содержит значение Primary Key для каждой index_granularity строки. + typedef std::vector PrimaryColumns; + PrimaryColumns primary_columns; + + for (const auto & descr : storage.getSortDescription()) + primary_columns.push_back( + !descr.column_name.empty() + ? &block.getByName(descr.column_name) + : &block.getByPosition(descr.column_number)); + + for (size_t i = index_offset; i < rows; i += storage.index_granularity) + { + for (PrimaryColumns::const_iterator it = primary_columns.begin(); it != primary_columns.end(); ++it) + { + if (storage.mode != MergeTreeData::Unsorted) + index_vec.push_back((*(*it)->column)[i]); + + (*it)->type->serializeBinary(index_vec.back(), *index_stream); + } + + ++marks_count; + } + + /// Множество записанных столбцов со смещениями, чтобы не писать общие для вложенных структур столбцы несколько раз + OffsetColumns offset_columns; + + /// Теперь пишем данные. + for (const auto & it : columns_list) + { + const ColumnWithTypeAndName & column = block.getByName(it.name); + + if (permutation) + { + ColumnPtr permutted_column = column.column->permute(*permutation, 0); + writeData(column.name, *column.type, *permutted_column, offset_columns); + } + else + writeData(column.name, *column.type, *column.column, offset_columns); + } + + size_t written_for_last_mark = (storage.index_granularity - index_offset + rows) % storage.index_granularity; + index_offset = (storage.index_granularity - written_for_last_mark) % storage.index_granularity; + } + private: NamesAndTypesList columns_list; String part_path; diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index f1b7405bd4f..69e66860317 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -147,15 +147,15 @@ void sortBlock(Block & block, const SortDescription & description, size_t limit) } -void stableSortBlock(Block & block, const SortDescription & description) +void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation) { if (!block) return; size_t size = block.rows(); - IColumn::Permutation perm(size); + out_permutation.resize(size); for (size_t i = 0; i < size; ++i) - perm[i] = i; + out_permutation[i] = i; ColumnsWithSortDescriptions columns_with_sort_desc; @@ -168,7 +168,17 @@ void stableSortBlock(Block & block, const SortDescription & description) columns_with_sort_desc.push_back(std::make_pair(column, description[i])); } - std::stable_sort(perm.begin(), perm.end(), PartialSortingLess(columns_with_sort_desc)); + std::stable_sort(out_permutation.begin(), out_permutation.end(), PartialSortingLess(columns_with_sort_desc)); +} + + +void stableSortBlock(Block & block, const SortDescription & description) +{ + if (!block) + return; + + IColumn::Permutation perm; + stableGetPermutation(block, description, perm); size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 11a7fa0cb5a..6055b799430 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -100,8 +100,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa SortDescription sort_descr = data.getSortDescription(); /// Сортируем. + IColumn::Permutation * perm_ptr = nullptr; + IColumn::Permutation perm; if (data.mode != MergeTreeData::Unsorted) - stableSortBlock(block, sort_descr); + { + stableGetPermutation(block, sort_descr, perm); + perm_ptr = &perm; + } NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames()); MergedBlockOutputStream out(data, part_tmp_path, columns, CompressionMethod::LZ4); @@ -109,7 +114,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa out.getIndex().reserve(part_size * sort_descr.size()); out.writePrefix(); - out.write(block); + out.writeWithPermutation(block, perm_ptr); MergeTreeData::DataPart::Checksums checksums = out.writeSuffixAndGetChecksums(); new_data_part->left_date = DayNum_t(min_date); From 3688bca2703ad54d94e98bcef361d02490e7723f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Aug 2015 05:47:07 +0300 Subject: [PATCH 71/88] dbms: addition to prev. revision [#METR-17704]. --- dbms/src/Interpreters/sortBlock.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/sortBlock.cpp b/dbms/src/Interpreters/sortBlock.cpp index 69e66860317..aebc60ff1ea 100644 --- a/dbms/src/Interpreters/sortBlock.cpp +++ b/dbms/src/Interpreters/sortBlock.cpp @@ -161,7 +161,7 @@ void stableGetPermutation(const Block & block, const SortDescription & descripti for (size_t i = 0, size = description.size(); i < size; ++i) { - IColumn * column = !description[i].column_name.empty() + const IColumn * column = !description[i].column_name.empty() ? block.getByName(description[i].column_name).column : block.getByPosition(description[i].column_number).column; From f11f0271e3cb35d3d792399a759f5b3a1e858c81 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Aug 2015 06:27:32 +0300 Subject: [PATCH 72/88] dbms: addition to prev. revision [#METR-17704]. --- .../MergeTree/MergedBlockOutputStream.h | 64 ++++++++++++------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h index 77572436a37..2f10d5ab6de 100644 --- a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h @@ -375,32 +375,26 @@ private: { size_t rows = block.rows(); - /// Сначала пишем индекс. Индекс содержит значение Primary Key для каждой index_granularity строки. - typedef std::vector PrimaryColumns; - PrimaryColumns primary_columns; - - for (const auto & descr : storage.getSortDescription()) - primary_columns.push_back( - !descr.column_name.empty() - ? &block.getByName(descr.column_name) - : &block.getByPosition(descr.column_number)); - - for (size_t i = index_offset; i < rows; i += storage.index_granularity) - { - for (PrimaryColumns::const_iterator it = primary_columns.begin(); it != primary_columns.end(); ++it) - { - if (storage.mode != MergeTreeData::Unsorted) - index_vec.push_back((*(*it)->column)[i]); - - (*it)->type->serializeBinary(index_vec.back(), *index_stream); - } - - ++marks_count; - } - /// Множество записанных столбцов со смещениями, чтобы не писать общие для вложенных структур столбцы несколько раз OffsetColumns offset_columns; + auto sort_description = storage.getSortDescription(); + + /// Сюда будем складывать столбцы, относящиеся к Primary Key, чтобы потом записать индекс. + std::vector primary_columns(sort_description.size()); + std::map primary_columns_name_to_position; + + for (size_t i = 0, size = sort_description.size(); i < size; ++i) + { + const auto & descr = sort_description[i]; + + String name = !descr.column_name.empty() + ? descr.column_name + : block.getByPosition(descr.column_number).name; + + primary_columns_name_to_position[name] = i; + } + /// Теперь пишем данные. for (const auto & it : columns_list) { @@ -410,9 +404,33 @@ private: { ColumnPtr permutted_column = column.column->permute(*permutation, 0); writeData(column.name, *column.type, *permutted_column, offset_columns); + + auto primary_column_it = primary_columns_name_to_position.find(it.name); + if (primary_columns_name_to_position.end() != primary_column_it) + primary_columns[primary_column_it->second] = ColumnWithTypeAndName{permutted_column, it.type, it.name}; } else + { writeData(column.name, *column.type, *column.column, offset_columns); + + auto primary_column_it = primary_columns_name_to_position.find(it.name); + if (primary_columns_name_to_position.end() != primary_column_it) + primary_columns[primary_column_it->second] = column; + } + } + + /// Пишем индекс. Индекс содержит значение Primary Key для каждой index_granularity строки. + for (size_t i = index_offset; i < rows; i += storage.index_granularity) + { + for (auto & primary_column : primary_columns) + { + if (storage.mode != MergeTreeData::Unsorted) + index_vec.push_back((*primary_column.column)[i]); + + primary_column.type->serializeBinary(index_vec.back(), *index_stream); + } + + ++marks_count; } size_t written_for_last_mark = (storage.index_granularity - index_offset + rows) % storage.index_granularity; From 325d73ea3979062676b5b22c935072d45e3d2531 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Aug 2015 06:41:31 +0300 Subject: [PATCH 73/88] dbms: addition to prev. revision [#METR-17704]. --- .../DB/Storages/MergeTree/MergedBlockOutputStream.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h index 2f10d5ab6de..1e74e4cbcd7 100644 --- a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h @@ -393,6 +393,10 @@ private: : block.getByPosition(descr.column_number).name; primary_columns_name_to_position[name] = i; + + primary_columns[i] = !descr.column_name.empty() + ? block.getByName(descr.column_name) + : block.getByPosition(descr.column_number); } /// Теперь пишем данные. @@ -412,17 +416,13 @@ private: else { writeData(column.name, *column.type, *column.column, offset_columns); - - auto primary_column_it = primary_columns_name_to_position.find(it.name); - if (primary_columns_name_to_position.end() != primary_column_it) - primary_columns[primary_column_it->second] = column; } } /// Пишем индекс. Индекс содержит значение Primary Key для каждой index_granularity строки. for (size_t i = index_offset; i < rows; i += storage.index_granularity) { - for (auto & primary_column : primary_columns) + for (const auto & primary_column : primary_columns) { if (storage.mode != MergeTreeData::Unsorted) index_vec.push_back((*primary_column.column)[i]); From 4724de39bdb3e6b61c53c8904b1572b8ad66ec99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 14 Aug 2015 23:18:08 +0300 Subject: [PATCH 74/88] dbms: lowered memory usage for INSERTs (-1 GiB for visits table on every insert) [#METR-17704]. --- dbms/include/DB/IO/HashingWriteBuffer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/IO/HashingWriteBuffer.h b/dbms/include/DB/IO/HashingWriteBuffer.h index 50f26f65e5a..154debf418a 100644 --- a/dbms/include/DB/IO/HashingWriteBuffer.h +++ b/dbms/include/DB/IO/HashingWriteBuffer.h @@ -15,8 +15,8 @@ template class IHashingBuffer : public BufferWithOwnMemory { public: - IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) : - block_pos(0), block_size(block_size_), state(0, 0) + IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) + : BufferWithOwnMemory(block_size_), block_pos(0), block_size(block_size_), state(0, 0) { } From 3fbb61cbad1428c887f89a38c83026e58eb94d2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Aug 2015 00:07:17 +0300 Subject: [PATCH 75/88] dbms: fixed error with multiple GLOBAL subqueries [#METR-17622]. --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 18 +++++++++++------- .../00213_multiple_global_in.reference | 1 + .../0_stateless/00213_multiple_global_in.sql | 1 + 3 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00213_multiple_global_in.reference create mode 100644 dbms/tests/queries/0_stateless/00213_multiple_global_in.sql diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 37bd9cc15a3..c02ee22684a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -239,9 +239,10 @@ void ExpressionAnalyzer::analyzeAggregation() void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables() { + /// Преобразует GLOBAL-подзапросы во внешние таблицы; кладёт их в словарь external_tables: name -> StoragePtr. initGlobalSubqueries(ast); - /// Создаёт словарь external_tables: name -> StoragePtr. + /// Добавляет уже существующие внешние таблицы (не подзапросы) в словарь external_tables. findExternalTables(ast); } @@ -896,10 +897,6 @@ static SharedPtr interpretSubquery( void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name) { - /// Сгенерируем имя для внешней таблицы. - while (context.tryGetExternalTable("_data" + toString(external_table_id))) - ++external_table_id; - if (const ASTIdentifier * table = typeid_cast(&*subquery_or_table_name)) { /// Если это уже внешняя таблица, ничего заполять не нужно. Просто запоминаем ее наличие. @@ -910,13 +907,20 @@ void ExpressionAnalyzer::addExternalStorage(ASTPtr & subquery_or_table_name) } } + /// Сгенерируем имя для внешней таблицы. + String external_table_name = "_data" + toString(external_table_id); + while (context.tryGetExternalTable(external_table_name) + || external_tables.count(external_table_name)) + { + ++external_table_id; + external_table_name = "_data" + toString(external_table_id); + } + SharedPtr interpreter = interpretSubquery(subquery_or_table_name, context, subquery_depth + 1); Block sample = interpreter->getSampleBlock(); NamesAndTypesListPtr columns = new NamesAndTypesList(sample.getColumnsList()); - String external_table_name = "_data" + toString(external_table_id); - /** Заменяем подзапрос на имя временной таблицы. * Именно в таком виде, запрос отправится на удалённый сервер. * На удалённый сервер отправится эта временная таблица, и на его стороне, diff --git a/dbms/tests/queries/0_stateless/00213_multiple_global_in.reference b/dbms/tests/queries/0_stateless/00213_multiple_global_in.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00213_multiple_global_in.reference @@ -0,0 +1 @@ +1 1 diff --git a/dbms/tests/queries/0_stateless/00213_multiple_global_in.sql b/dbms/tests/queries/0_stateless/00213_multiple_global_in.sql new file mode 100644 index 00000000000..b93c2bec722 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00213_multiple_global_in.sql @@ -0,0 +1 @@ +SELECT 1 GLOBAL IN (SELECT 1), 2 GLOBAL IN (SELECT 2) FROM remote('127.0.0.2', system.one); From fa8100bd05d09180ee5a678c94395f5bfa24027a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 15 Aug 2015 00:30:48 +0300 Subject: [PATCH 76/88] dbms: improved formatting [#METR-17664]. --- dbms/include/DB/Parsers/ASTJoin.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 859b7b3cfcf..34eaeecd5e5 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -98,17 +98,15 @@ protected: settings.ostr << (strictness == ASTJoin::Any ? "ANY " : "ALL "); settings.ostr << (kind == ASTJoin::Inner ? "INNER " - : (kind == ASTJoin::Left ? "LEFT " - : (kind == ASTJoin::Right ? "RIGHT " - : (kind == ASTJoin::Cross ? "CROSS " - : "FULL OUTER ")))); + : (kind == ASTJoin::Left ? "LEFT " + : (kind == ASTJoin::Right ? "RIGHT " + : (kind == ASTJoin::Cross ? "CROSS " + : "FULL OUTER ")))); settings.ostr << "JOIN " - << (settings.hilite ? hilite_none : ""); + << (settings.hilite ? hilite_none : ""); - FormatStateStacked frame_with_indent = frame; - ++frame_with_indent.indent; - table->formatImpl(settings, state, frame_with_indent); + table->formatImpl(settings, state, frame); if (kind != ASTJoin::Cross) { From 469b409ba0fecbd641c38666dc6c302256aa0f7d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 10:01:41 +0300 Subject: [PATCH 77/88] dbms: added storage StripeLog (incomplete) [#METR-17716]. --- dbms/include/DB/Common/FileChecker.h | 6 +- dbms/include/DB/Core/ErrorCodes.h | 1 + .../DB/DataStreams/MarkInCompressedFile.h | 38 +++ .../DB/DataStreams/NativeBlockInputStream.h | 62 ++++- .../DB/DataStreams/NativeBlockOutputStream.h | 15 +- dbms/include/DB/Storages/MarkCache.h | 25 +- dbms/include/DB/Storages/StorageLog.h | 4 +- dbms/include/DB/Storages/StorageStripeLog.h | 90 +++++++ dbms/include/DB/Storages/StorageTinyLog.h | 4 +- .../DataStreams/NativeBlockInputStream.cpp | 105 +++++++- .../DataStreams/NativeBlockOutputStream.cpp | 46 ++++ dbms/src/Storages/StorageFactory.cpp | 8 + dbms/src/Storages/StorageLog.cpp | 4 +- dbms/src/Storages/StorageStripeLog.cpp | 226 ++++++++++++++++++ dbms/src/Storages/StorageTinyLog.cpp | 9 +- 15 files changed, 592 insertions(+), 51 deletions(-) create mode 100644 dbms/include/DB/DataStreams/MarkInCompressedFile.h create mode 100644 dbms/include/DB/Storages/StorageStripeLog.h create mode 100644 dbms/src/Storages/StorageStripeLog.cpp diff --git a/dbms/include/DB/Common/FileChecker.h b/dbms/include/DB/Common/FileChecker.h index 88e2fa18eab..4e37ef0b16f 100644 --- a/dbms/include/DB/Common/FileChecker.h +++ b/dbms/include/DB/Common/FileChecker.h @@ -16,12 +16,11 @@ namespace DB { /// хранит размеры всех столбцов, и может проверять не побились ли столбцы -template class FileChecker { public: - FileChecker(const std::string &file_info_path_, Storage & storage_) : - files_info_path(file_info_path_), files_info(), storage(storage_), log(&Logger::get("FileChecker")) + FileChecker(const std::string & file_info_path_) : + files_info_path(file_info_path_), files_info(), log(&Logger::get("FileChecker")) { Poco::Path path(files_info_path); tmp_files_info_path = path.parent().toString() + "tmp_" + path.getFileName(); @@ -107,7 +106,6 @@ private: using PropertyTree = boost::property_tree::ptree; PropertyTree files_info; - Storage & storage; Logger * log; }; } diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 3336d80fcf4..c10f878ab4d 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -285,6 +285,7 @@ namespace ErrorCodes ALL_CONNECTION_TRIES_FAILED = 279, NO_AVAILABLE_DATA = 280, DICTIONARY_IS_EMPTY = 281, + INCORRECT_INDEX = 282, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, diff --git a/dbms/include/DB/DataStreams/MarkInCompressedFile.h b/dbms/include/DB/DataStreams/MarkInCompressedFile.h new file mode 100644 index 00000000000..ff21cbb8af4 --- /dev/null +++ b/dbms/include/DB/DataStreams/MarkInCompressedFile.h @@ -0,0 +1,38 @@ +#pragma once + +#include + +#include +#include + + +namespace DB +{ + +/** Засечка - позиция в сжатом файле. Сжатый файл состоит из уложенных подряд сжатых блоков. + * Засечка представляют собой пару - смещение в файле до начала сжатого блока, смещение в разжатом блоке до начала данных. + */ +struct MarkInCompressedFile +{ + size_t offset_in_compressed_file; + size_t offset_in_decompressed_block; + + bool operator==(const MarkInCompressedFile & rhs) const + { + return std::tie(offset_in_compressed_file, offset_in_decompressed_block) + == std::tie(rhs.offset_in_compressed_file, rhs.offset_in_decompressed_block); + } + bool operator!=(const MarkInCompressedFile & rhs) const + { + return !(*this == rhs); + } + + String toString() const + { + return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")"; + } +}; + +using MarksInCompressedFile = std::vector; + +} diff --git a/dbms/include/DB/DataStreams/NativeBlockInputStream.h b/dbms/include/DB/DataStreams/NativeBlockInputStream.h index c9810b55385..ad725f625a7 100644 --- a/dbms/include/DB/DataStreams/NativeBlockInputStream.h +++ b/dbms/include/DB/DataStreams/NativeBlockInputStream.h @@ -1,22 +1,73 @@ #pragma once #include +#include namespace DB { +class CompressedReadBufferFromFile; + + +/** Формат Native может содержать отдельно расположенный индекс, + * который позволяет понять, где какой столбец расположен, + * и пропускать ненужные столбцы. + */ + +/** Позиция одного кусочка одного столбца. */ +struct IndexOfOneColumnForNativeFormat +{ + String name; + String type; + MarkInCompressedFile location; +}; + +/** Индекс для блока данных. */ +struct IndexOfBlockForNativeFormat +{ + using Columns = std::vector; + + size_t num_columns; + size_t num_rows; + Columns columns; +}; + +/** Весь индекс. */ +struct IndexForNativeFormat +{ + using Blocks = std::vector; + Blocks blocks; + + IndexForNativeFormat() {} + + IndexForNativeFormat(ReadBuffer & istr, const NameSet & required_columns) + { + read(istr, required_columns); + } + + /// Прочитать индекс, только для нужных столбцов. + void read(ReadBuffer & istr, const NameSet & required_columns); +}; + + /** Десериализует поток блоков из родного бинарного формата (с именами и типами столбцов). * Предназначено для взаимодействия между серверами. + * + * Также может использоваться для хранения данных на диске. + * В этом случае, может использовать индекс. */ class NativeBlockInputStream : public IProfilingBlockInputStream { public: /** В случае указания ненулевой server_revision, может ожидаться и считываться дополнительная информация о блоке, * в зависимости от поддерживаемой для указанной ревизии. + * + * index - не обязательный параметр. Если задан, то будут читаться только указанные в индексе кусочки столбцов. */ - NativeBlockInputStream(ReadBuffer & istr_, UInt64 server_revision_ = 0) - : istr(istr_), server_revision(server_revision_) {} + NativeBlockInputStream( + ReadBuffer & istr_, UInt64 server_revision_ = 0, + const IndexForNativeFormat * index_ = nullptr); String getName() const override { return "Native"; } @@ -35,6 +86,13 @@ protected: private: ReadBuffer & istr; UInt64 server_revision; + + const IndexForNativeFormat * index; + IndexForNativeFormat::Blocks::const_iterator index_block_it; + IndexOfBlockForNativeFormat::Columns::const_iterator index_column_it; + + /// Если задан индекс, то istr должен быть CompressedReadBufferFromFile. + CompressedReadBufferFromFile * istr_concrete; }; } diff --git a/dbms/include/DB/DataStreams/NativeBlockOutputStream.h b/dbms/include/DB/DataStreams/NativeBlockOutputStream.h index e0d2c3023f8..b39444f06be 100644 --- a/dbms/include/DB/DataStreams/NativeBlockOutputStream.h +++ b/dbms/include/DB/DataStreams/NativeBlockOutputStream.h @@ -6,8 +6,14 @@ namespace DB { +class WriteBuffer; +class CompressedWriteBuffer; + + /** Сериализует поток блоков в родном бинарном формате (с именами и типами столбцов). * Предназначено для взаимодействия между серверами. + * + * Может быть указан поток для записи индекса. Индекс содержит смещения до каждого кусочка каждого столбца. */ class NativeBlockOutputStream : public IBlockOutputStream { @@ -15,8 +21,9 @@ public: /** В случае указания ненулевой client_revision, может записываться дополнительная информация о блоке, * в зависимости от поддерживаемой для указанной ревизии. */ - NativeBlockOutputStream(WriteBuffer & ostr_, UInt64 client_revision_ = 0) - : ostr(ostr_), client_revision(client_revision_) {} + NativeBlockOutputStream( + WriteBuffer & ostr_, UInt64 client_revision_ = 0, + WriteBuffer * index_ostr_ = nullptr); void write(const Block & block) override; void flush() override { ostr.next(); } @@ -26,6 +33,10 @@ public: private: WriteBuffer & ostr; UInt64 client_revision; + + WriteBuffer * index_ostr; + /// Если требуется записывать индекс, то ostr обязан быть CompressedWriteBuffer. + CompressedWriteBuffer * ostr_concrete = nullptr; }; } diff --git a/dbms/include/DB/Storages/MarkCache.h b/dbms/include/DB/Storages/MarkCache.h index 2bea8630824..d556f9b204e 100644 --- a/dbms/include/DB/Storages/MarkCache.h +++ b/dbms/include/DB/Storages/MarkCache.h @@ -6,34 +6,13 @@ #include #include #include +#include + namespace DB { -struct MarkInCompressedFile -{ - size_t offset_in_compressed_file; - size_t offset_in_decompressed_block; - - bool operator==(const MarkInCompressedFile & rhs) const - { - return std::forward_as_tuple(offset_in_compressed_file, offset_in_decompressed_block) == - std::forward_as_tuple(rhs.offset_in_compressed_file, rhs.offset_in_decompressed_block); - } - bool operator!=(const MarkInCompressedFile & rhs) const - { - return !(*this == rhs); - } - - String toString() const - { - return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")"; - } -}; - -typedef std::vector MarksInCompressedFile; - /// Оценка количества байтов, занимаемых засечками в кеше. struct MarksWeightFunction { diff --git a/dbms/include/DB/Storages/StorageLog.h b/dbms/include/DB/Storages/StorageLog.h index 626b04e6929..5acc06fda80 100644 --- a/dbms/include/DB/Storages/StorageLog.h +++ b/dbms/include/DB/Storages/StorageLog.h @@ -84,8 +84,6 @@ public: }; typedef std::map Files_t; - Files_t & getFiles() { return files; } - bool checkData() const override; protected: @@ -149,7 +147,7 @@ private: size_t max_compress_block_size; protected: - FileChecker file_checker; + FileChecker file_checker; private: /** Для обычных столбцов, в засечках указано количество строчек в блоке. diff --git a/dbms/include/DB/Storages/StorageStripeLog.h b/dbms/include/DB/Storages/StorageStripeLog.h new file mode 100644 index 00000000000..d0217ffd7c2 --- /dev/null +++ b/dbms/include/DB/Storages/StorageStripeLog.h @@ -0,0 +1,90 @@ +#pragma once + +#include +#include + +#include +#include + + +namespace DB +{ + +/** Реализует хранилище, подходящее для маленьких кусочков лога. + * При этом, хранит все столбцы в одном файле формата Native, с расположенным рядом индексом. + */ +class StorageStripeLog : public IStorage +{ +friend class StripeLogBlockInputStream; +friend class StripeLogBlockOutputStream; + +public: + /** Подцепить таблицу с соответствующим именем, по соответствующему пути (с / на конце), + * (корректность имён и путей не проверяется) + * состоящую из указанных столбцов. + * Если не указано attach - создать директорию, если её нет. + */ + static StoragePtr create( + const std::string & path_, + const std::string & name_, + NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + bool attach, + size_t max_compress_block_size_ = DEFAULT_MAX_COMPRESS_BLOCK_SIZE); + + std::string getName() const override { return "StripeLog"; } + std::string getTableName() const override { return name; } + + const NamesAndTypesList & getColumnsListImpl() const override { return *columns; } + + BlockInputStreams read( + const Names & column_names, + ASTPtr query, + const Context & context, + const Settings & settings, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size = DEFAULT_BLOCK_SIZE, + unsigned threads = 1) override; + + BlockOutputStreamPtr write(ASTPtr query) override; + + void drop() override; + + void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; + + bool checkData() const override; + + /// Данные файла. + struct ColumnData + { + Poco::File data_file; + }; + typedef std::map Files_t; + + std::string full_path() { return path + escapeForFileName(name) + '/';} + +private: + String path; + String name; + NamesAndTypesListPtr columns; + + size_t max_compress_block_size; + + FileChecker file_checker; + + Logger * log; + + StorageStripeLog( + const std::string & path_, + const std::string & name_, + NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + bool attach, + size_t max_compress_block_size_); +}; + +} diff --git a/dbms/include/DB/Storages/StorageTinyLog.h b/dbms/include/DB/Storages/StorageTinyLog.h index e0c4ecab489..2a0c31c47a2 100644 --- a/dbms/include/DB/Storages/StorageTinyLog.h +++ b/dbms/include/DB/Storages/StorageTinyLog.h @@ -64,8 +64,6 @@ public: }; typedef std::map Files_t; - Files_t & getFiles(); - std::string full_path() { return path + escapeForFileName(name) + '/';} private: @@ -77,7 +75,7 @@ private: Files_t files; - FileChecker file_checker; + FileChecker file_checker; Logger * log; diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 54ca62c253c..847a6b825f7 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -14,6 +15,23 @@ namespace DB { +NativeBlockInputStream::NativeBlockInputStream( + ReadBuffer & istr_, UInt64 server_revision_, + const IndexForNativeFormat * index_) + : istr(istr_), server_revision(server_revision_), index(index_) +{ + if (index) + { + istr_concrete = typeid_cast(&istr); + if (!istr_concrete) + throw Exception("When need to use index for NativeBlockInputStream, istr must be CompressedReadBufferFromFile.", ErrorCodes::LOGICAL_ERROR); + + index_block_it = index->blocks.begin(); + index_column_it = index_block_it->columns.begin(); + } +} + + void NativeBlockInputStream::readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows) { /** Для массивов требуется сначала десериализовать смещения, а потом значения. @@ -47,9 +65,17 @@ Block NativeBlockInputStream::readImpl() const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - if (istr.eof()) + if (index && index_block_it == index->blocks.end()) return res; + if (istr.eof()) + { + if (index) + throw Exception("Input doesn't contain all data for index.", ErrorCodes::CANNOT_READ_ALL_DATA); + + return res; + } + /// Дополнительная информация о блоке. if (server_revision >= DBMS_MIN_REVISION_WITH_BLOCK_INFO) res.info.read(istr); @@ -57,29 +83,98 @@ Block NativeBlockInputStream::readImpl() /// Размеры size_t columns = 0; size_t rows = 0; - readVarUInt(columns, istr); - readVarUInt(rows, istr); + + if (!index) + { + readVarUInt(columns, istr); + readVarUInt(rows, istr); + } + else + { + columns = index_block_it->num_columns; + rows = index_block_it->num_rows; + } for (size_t i = 0; i < columns; ++i) { + if (index) /// Если текущая позиция какая требуется, то реального seek-а не происходит. + istr_concrete->seek(index_column_it->location.offset_in_compressed_file, index_column_it->location.offset_in_decompressed_block); + ColumnWithTypeAndName column; /// Имя - readStringBinary(column.name, istr); + readBinary(column.name, istr); /// Тип String type_name; - readStringBinary(type_name, istr); + readBinary(type_name, istr); column.type = data_type_factory.get(type_name); + if (index) + { + /// Индекс позволяет сделать проверки. + if (index_column_it->name != column.name) + throw Exception("Index points to column with wrong name: corrupted index or data", ErrorCodes::INCORRECT_INDEX); + if (index_column_it->type != type_name) + throw Exception("Index points to column with wrong type: corrupted index or data", ErrorCodes::INCORRECT_INDEX); + } + /// Данные column.column = column.type->createColumn(); readData(*column.type, *column.column, istr, rows); res.insert(column); + + if (index) + ++index_column_it; + } + + if (index) + { + if (index_column_it != index_block_it->columns.end()) + throw Exception("Inconsistent index: not all columns were read", ErrorCodes::INCORRECT_INDEX); + + ++index_block_it; } return res; } + +void IndexForNativeFormat::read(ReadBuffer & istr, const NameSet & required_columns) +{ + while (!istr.eof()) + { + blocks.emplace_back(); + IndexOfBlockForNativeFormat & block = blocks.back(); + + readVarUInt(block.num_columns, istr); + readVarUInt(block.num_rows, istr); + + if (block.num_columns < required_columns.size()) + throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX); + + for (size_t i = 0; i < block.num_columns; ++i) + { + IndexOfOneColumnForNativeFormat column_index; + + readBinary(column_index.name, istr); + readBinary(column_index.type, istr); + readBinary(column_index.location.offset_in_compressed_file, istr); + readBinary(column_index.location.offset_in_decompressed_block, istr); + + if (required_columns.count(column_index.name)) + block.columns.push_back(std::move(column_index)); + } + + if (block.columns.size() < required_columns.size()) + throw Exception("Index contain less than required columns", ErrorCodes::INCORRECT_INDEX); + if (block.columns.size() > required_columns.size()) + throw Exception("Index contain duplicate columns", ErrorCodes::INCORRECT_INDEX); + + block.num_columns = block.columns.size(); + } +} + + } diff --git a/dbms/src/DataStreams/NativeBlockOutputStream.cpp b/dbms/src/DataStreams/NativeBlockOutputStream.cpp index a85a10a5909..ccc461d6015 100644 --- a/dbms/src/DataStreams/NativeBlockOutputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockOutputStream.cpp @@ -2,12 +2,14 @@ #include #include +#include #include #include #include +#include #include @@ -15,6 +17,21 @@ namespace DB { +NativeBlockOutputStream::NativeBlockOutputStream( + WriteBuffer & ostr_, UInt64 client_revision_, + WriteBuffer * index_ostr_) + : ostr(ostr_), client_revision(client_revision_), + index_ostr(index_ostr_) +{ + if (index_ostr) + { + ostr_concrete = typeid_cast(&ostr); + if (!ostr_concrete) + throw Exception("When need to write index for NativeBlockOutputStream, ostr must be CompressedWriteBuffer.", ErrorCodes::LOGICAL_ERROR); + } +} + + void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr & column, WriteBuffer & ostr, size_t offset, size_t limit) { /** Если есть столбцы-константы - то материализуем их. @@ -71,11 +88,31 @@ void NativeBlockOutputStream::write(const Block & block) /// Размеры size_t columns = block.columns(); size_t rows = block.rows(); + writeVarUInt(columns, ostr); writeVarUInt(rows, ostr); + /** Индекс имеет ту же структуру, что и поток с данными. + * Но вместо значений столбца он содержит засечку, ссылающуюся на место в файле с данными, где находится этот кусочек столбца. + */ + if (index_ostr) + { + writeVarUInt(columns, *index_ostr); + writeVarUInt(rows, *index_ostr); + } + for (size_t i = 0; i < columns; ++i) { + /// Для индекса. + MarkInCompressedFile mark; + + if (index_ostr) + { + ostr_concrete->next(); /// Заканчиваем сжатый блок. + mark.offset_in_compressed_file = ostr_concrete->getCompressedBytes(); + mark.offset_in_decompressed_block = ostr_concrete->getRemainingBytes(); + } + const ColumnWithTypeAndName & column = block.getByPosition(i); /// Имя @@ -86,6 +123,15 @@ void NativeBlockOutputStream::write(const Block & block) /// Данные writeData(*column.type, column.column, ostr, 0, 0); + + if (index_ostr) + { + writeStringBinary(column.name, *index_ostr); + writeStringBinary(column.type->getName(), *index_ostr); + + writeBinary(mark.offset_in_compressed_file, *index_ostr); + writeBinary(mark.offset_in_decompressed_block, *index_ostr); + } } } diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index 8d98faf9540..754354a8fe3 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -177,6 +178,13 @@ StoragePtr StorageFactory::get( materialized_columns, alias_columns, column_defaults, attach, context.getSettings().max_compress_block_size); } + else if (name == "StripeLog") + { + return StorageStripeLog::create( + data_path, table_name, columns, + materialized_columns, alias_columns, column_defaults, + attach, context.getSettings().max_compress_block_size); + } else if (name == "Set") { return StorageSet::create( diff --git a/dbms/src/Storages/StorageLog.cpp b/dbms/src/Storages/StorageLog.cpp index ac1c80ef74a..38c590995ff 100644 --- a/dbms/src/Storages/StorageLog.cpp +++ b/dbms/src/Storages/StorageLog.cpp @@ -129,7 +129,7 @@ private: { Stream(const std::string & data_path, size_t max_compress_block_size) : plain(data_path, max_compress_block_size, O_APPEND | O_CREAT | O_WRONLY), - compressed(plain) + compressed(plain, CompressionMethod::LZ4, max_compress_block_size) { plain_offset = Poco::File(data_path).getSize(); } @@ -463,7 +463,7 @@ StorageLog::StorageLog( : IStorage{materialized_columns_, alias_columns_, column_defaults_}, path(path_), name(name_), columns(columns_), loaded_marks(false), max_compress_block_size(max_compress_block_size_), - file_checker(path + escapeForFileName(name) + '/' + "sizes.json", *this) + file_checker(path + escapeForFileName(name) + '/' + "sizes.json") { if (columns->empty()) throw Exception("Empty list of columns passed to StorageLog constructor", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp new file mode 100644 index 00000000000..ed98c92e9ae --- /dev/null +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -0,0 +1,226 @@ +#include + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include + + +namespace DB +{ + +#define INDEX_BUFFER_SIZE 4096 + + +class StripeLogBlockInputStream : public IProfilingBlockInputStream +{ +public: + StripeLogBlockInputStream(const Names & column_names_, StorageStripeLog & storage_, size_t max_read_buffer_size_) + : column_names(column_names_.begin(), column_names_.end()), storage(storage_), + data_in(storage.full_path() + "data.bin", 0, 0, max_read_buffer_size_), + index_in(storage.full_path() + "index.mrk", 0, 0, INDEX_BUFFER_SIZE), + index(index_in, column_names), + block_in(data_in, 0, &index) + { + } + + String getName() const override { return "StripeLog"; } + + String getID() const override + { + std::stringstream s; + s << "StripeLog"; + for (const auto & name : column_names) + s << ", " << name; /// NOTE Отсутствует эскейпинг. + return s.str(); + } + +protected: + Block readImpl() override + { + return block_in.read(); + } + +private: + NameSet column_names; + StorageStripeLog & storage; + + CompressedReadBufferFromFile data_in; + CompressedReadBufferFromFile index_in; + IndexForNativeFormat index; + NativeBlockInputStream block_in; +}; + + +class StripeLogBlockOutputStream : public IBlockOutputStream +{ +public: + StripeLogBlockOutputStream(StorageStripeLog & storage_) + : storage(storage_), + data_out_compressed(storage.full_path() + "data.bin"), + data_out(data_out_compressed, CompressionMethod::LZ4, storage.max_compress_block_size), + index_out_compressed(storage.full_path() + "index.mrk", INDEX_BUFFER_SIZE), + index_out(index_out_compressed), + block_out(data_out, 0, &index_out) + { + } + + ~StripeLogBlockOutputStream() + { + try + { + writeSuffix(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + void write(const Block & block) override + { + block_out.write(block); + } + + void writeSuffix() override + { + if (done) + return; + + block_out.writeSuffix(); + data_out.next(); + data_out_compressed.next(); + index_out.next(); + index_out_compressed.next(); + + FileChecker::Files files{ data_out_compressed.getFileName(), index_out_compressed.getFileName() }; + storage.file_checker.update(files.begin(), files.end()); + + done = true; + } + +private: + StorageStripeLog & storage; + + WriteBufferFromFile data_out_compressed; + CompressedWriteBuffer data_out; + WriteBufferFromFile index_out_compressed; + CompressedWriteBuffer index_out; + NativeBlockOutputStream block_out; + + bool done = false; +}; + + +StorageStripeLog::StorageStripeLog( + const std::string & path_, + const std::string & name_, + NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + bool attach, + size_t max_compress_block_size_) + : IStorage{materialized_columns_, alias_columns_, column_defaults_}, + path(path_), name(name_), columns(columns_), + max_compress_block_size(max_compress_block_size_), + file_checker(path + escapeForFileName(name) + '/' + "sizes.json"), + log(&Logger::get("StorageStripeLog")) +{ + if (columns->empty()) + throw Exception("Empty list of columns passed to StorageStripeLog constructor", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + + String full_path = path + escapeForFileName(name) + '/'; + if (!attach) + { + /// создаём файлы, если их нет + if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) + throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); + } +} + +StoragePtr StorageStripeLog::create( + const std::string & path_, + const std::string & name_, + NamesAndTypesListPtr columns_, + const NamesAndTypesList & materialized_columns_, + const NamesAndTypesList & alias_columns_, + const ColumnDefaults & column_defaults_, + bool attach, + size_t max_compress_block_size_) +{ + return (new StorageStripeLog{ + path_, name_, columns_, + materialized_columns_, alias_columns_, column_defaults_, + attach, max_compress_block_size_ + })->thisPtr(); +} + + +void StorageStripeLog::rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) +{ + /// Переименовываем директорию с данными. + Poco::File(path + escapeForFileName(name)).renameTo(new_path_to_db + escapeForFileName(new_table_name)); + + path = new_path_to_db; + name = new_table_name; + file_checker.setPath(path + escapeForFileName(name) + "/" + "sizes.json"); +} + + +BlockInputStreams StorageStripeLog::read( + const Names & column_names, + ASTPtr query, + const Context & context, + const Settings & settings, + QueryProcessingStage::Enum & processed_stage, + const size_t max_block_size, + const unsigned threads) +{ + check(column_names); + processed_stage = QueryProcessingStage::FetchColumns; + return BlockInputStreams(1, new StripeLogBlockInputStream(column_names, *this, settings.max_read_buffer_size)); +} + + +BlockOutputStreamPtr StorageStripeLog::write( + ASTPtr query) +{ + return new StripeLogBlockOutputStream(*this); +} + + +void StorageStripeLog::drop() +{ +} + +bool StorageStripeLog::checkData() const +{ + return file_checker.check(); +} + +} diff --git a/dbms/src/Storages/StorageTinyLog.cpp b/dbms/src/Storages/StorageTinyLog.cpp index 89bc91d6084..2214e8e1f7f 100644 --- a/dbms/src/Storages/StorageTinyLog.cpp +++ b/dbms/src/Storages/StorageTinyLog.cpp @@ -108,7 +108,7 @@ private: { Stream(const std::string & data_path, size_t max_compress_block_size) : plain(data_path, max_compress_block_size, O_APPEND | O_CREAT | O_WRONLY), - compressed(plain) + compressed(plain, CompressionMethod::LZ4, max_compress_block_size) { } @@ -352,7 +352,7 @@ StorageTinyLog::StorageTinyLog( : IStorage{materialized_columns_, alias_columns_, column_defaults_}, path(path_), name(name_), columns(columns_), max_compress_block_size(max_compress_block_size_), - file_checker(path + escapeForFileName(name) + '/' + "sizes.json", *this), + file_checker(path + escapeForFileName(name) + '/' + "sizes.json"), log(&Logger::get("StorageTinyLog")) { if (columns->empty()) @@ -467,9 +467,4 @@ bool StorageTinyLog::checkData() const return file_checker.check(); } -StorageTinyLog::Files_t & StorageTinyLog::getFiles() -{ - return files; -} - } From 5744b77040ef78e7e009fb38b79f10ca62a00822 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 10:30:01 +0300 Subject: [PATCH 78/88] dbms: addition to prev. revision [#METR-17716]. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 847a6b825f7..a2af073c480 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -97,8 +97,11 @@ Block NativeBlockInputStream::readImpl() for (size_t i = 0; i < columns; ++i) { - if (index) /// Если текущая позиция какая требуется, то реального seek-а не происходит. + if (index) + { + /// Если текущая позиция и так какая требуется, то реального seek-а не происходит. istr_concrete->seek(index_column_it->location.offset_in_compressed_file, index_column_it->location.offset_in_decompressed_block); + } ColumnWithTypeAndName column; @@ -135,6 +138,8 @@ Block NativeBlockInputStream::readImpl() throw Exception("Inconsistent index: not all columns were read", ErrorCodes::INCORRECT_INDEX); ++index_block_it; + if (index_block_it != index->blocks.end()) + index_column_it = index_block_it->columns.begin(); } return res; From 5011e4d5810aaf23b9b8bf32e9bbbe4f31c96a53 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 11:18:34 +0300 Subject: [PATCH 79/88] dbms: added StripeLog engine [#METR-17716]. --- .../DB/DataStreams/NativeBlockInputStream.h | 7 ++- dbms/include/DB/Storages/StorageStripeLog.h | 3 +- .../DataStreams/NativeBlockInputStream.cpp | 26 +++++---- dbms/src/Storages/StorageStripeLog.cpp | 57 ++++++++++++++----- 4 files changed, 64 insertions(+), 29 deletions(-) diff --git a/dbms/include/DB/DataStreams/NativeBlockInputStream.h b/dbms/include/DB/DataStreams/NativeBlockInputStream.h index ad725f625a7..f69f0b91329 100644 --- a/dbms/include/DB/DataStreams/NativeBlockInputStream.h +++ b/dbms/include/DB/DataStreams/NativeBlockInputStream.h @@ -67,7 +67,9 @@ public: */ NativeBlockInputStream( ReadBuffer & istr_, UInt64 server_revision_ = 0, - const IndexForNativeFormat * index_ = nullptr); + bool use_index_ = false, + IndexForNativeFormat::Blocks::const_iterator index_block_it_ = IndexForNativeFormat::Blocks::const_iterator{}, + IndexForNativeFormat::Blocks::const_iterator index_block_end_ = IndexForNativeFormat::Blocks::const_iterator{}); String getName() const override { return "Native"; } @@ -87,8 +89,9 @@ private: ReadBuffer & istr; UInt64 server_revision; - const IndexForNativeFormat * index; + bool use_index; IndexForNativeFormat::Blocks::const_iterator index_block_it; + IndexForNativeFormat::Blocks::const_iterator index_block_end; IndexOfBlockForNativeFormat::Columns::const_iterator index_column_it; /// Если задан индекс, то istr должен быть CompressedReadBufferFromFile. diff --git a/dbms/include/DB/Storages/StorageStripeLog.h b/dbms/include/DB/Storages/StorageStripeLog.h index d0217ffd7c2..d12642e0963 100644 --- a/dbms/include/DB/Storages/StorageStripeLog.h +++ b/dbms/include/DB/Storages/StorageStripeLog.h @@ -50,8 +50,6 @@ public: BlockOutputStreamPtr write(ASTPtr query) override; - void drop() override; - void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; bool checkData() const override; @@ -73,6 +71,7 @@ private: size_t max_compress_block_size; FileChecker file_checker; + Poco::RWLock rwlock; Logger * log; diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index a2af073c480..0b21d86a541 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -17,16 +17,18 @@ namespace DB NativeBlockInputStream::NativeBlockInputStream( ReadBuffer & istr_, UInt64 server_revision_, - const IndexForNativeFormat * index_) - : istr(istr_), server_revision(server_revision_), index(index_) + bool use_index_, + IndexForNativeFormat::Blocks::const_iterator index_block_it_, + IndexForNativeFormat::Blocks::const_iterator index_block_end_) + : istr(istr_), server_revision(server_revision_), + use_index(use_index_), index_block_it(index_block_it_), index_block_end(index_block_end_) { - if (index) + if (use_index) { istr_concrete = typeid_cast(&istr); if (!istr_concrete) throw Exception("When need to use index for NativeBlockInputStream, istr must be CompressedReadBufferFromFile.", ErrorCodes::LOGICAL_ERROR); - index_block_it = index->blocks.begin(); index_column_it = index_block_it->columns.begin(); } } @@ -65,12 +67,12 @@ Block NativeBlockInputStream::readImpl() const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - if (index && index_block_it == index->blocks.end()) + if (use_index && index_block_it == index_block_end) return res; if (istr.eof()) { - if (index) + if (use_index) throw Exception("Input doesn't contain all data for index.", ErrorCodes::CANNOT_READ_ALL_DATA); return res; @@ -84,7 +86,7 @@ Block NativeBlockInputStream::readImpl() size_t columns = 0; size_t rows = 0; - if (!index) + if (!use_index) { readVarUInt(columns, istr); readVarUInt(rows, istr); @@ -97,7 +99,7 @@ Block NativeBlockInputStream::readImpl() for (size_t i = 0; i < columns; ++i) { - if (index) + if (use_index) { /// Если текущая позиция и так какая требуется, то реального seek-а не происходит. istr_concrete->seek(index_column_it->location.offset_in_compressed_file, index_column_it->location.offset_in_decompressed_block); @@ -113,7 +115,7 @@ Block NativeBlockInputStream::readImpl() readBinary(type_name, istr); column.type = data_type_factory.get(type_name); - if (index) + if (use_index) { /// Индекс позволяет сделать проверки. if (index_column_it->name != column.name) @@ -128,17 +130,17 @@ Block NativeBlockInputStream::readImpl() res.insert(column); - if (index) + if (use_index) ++index_column_it; } - if (index) + if (use_index) { if (index_column_it != index_block_it->columns.end()) throw Exception("Inconsistent index: not all columns were read", ErrorCodes::INCORRECT_INDEX); ++index_block_it; - if (index_block_it != index->blocks.end()) + if (index_block_it != index_block_end) index_column_it = index_block_it->columns.begin(); } diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index ed98c92e9ae..3659b987fd7 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -39,12 +39,14 @@ namespace DB class StripeLogBlockInputStream : public IProfilingBlockInputStream { public: - StripeLogBlockInputStream(const Names & column_names_, StorageStripeLog & storage_, size_t max_read_buffer_size_) + StripeLogBlockInputStream(const NameSet & column_names_, StorageStripeLog & storage_, size_t max_read_buffer_size_, + const Poco::SharedPtr & index_, + IndexForNativeFormat::Blocks::const_iterator index_begin_, + IndexForNativeFormat::Blocks::const_iterator index_end_) : column_names(column_names_.begin(), column_names_.end()), storage(storage_), + index(index_), index_begin(index_begin_), index_end(index_end_), data_in(storage.full_path() + "data.bin", 0, 0, max_read_buffer_size_), - index_in(storage.full_path() + "index.mrk", 0, 0, INDEX_BUFFER_SIZE), - index(index_in, column_names), - block_in(data_in, 0, &index) + block_in(data_in, 0, true, index_begin, index_end) { } @@ -69,9 +71,11 @@ private: NameSet column_names; StorageStripeLog & storage; + const Poco::SharedPtr index; + IndexForNativeFormat::Blocks::const_iterator index_begin; + IndexForNativeFormat::Blocks::const_iterator index_end; + CompressedReadBufferFromFile data_in; - CompressedReadBufferFromFile index_in; - IndexForNativeFormat index; NativeBlockInputStream block_in; }; @@ -80,7 +84,7 @@ class StripeLogBlockOutputStream : public IBlockOutputStream { public: StripeLogBlockOutputStream(StorageStripeLog & storage_) - : storage(storage_), + : storage(storage_), lock(storage.rwlock), data_out_compressed(storage.full_path() + "data.bin"), data_out(data_out_compressed, CompressionMethod::LZ4, storage.max_compress_block_size), index_out_compressed(storage.full_path() + "index.mrk", INDEX_BUFFER_SIZE), @@ -125,6 +129,7 @@ public: private: StorageStripeLog & storage; + Poco::ScopedWriteRWLock lock; WriteBufferFromFile data_out_compressed; CompressedWriteBuffer data_out; @@ -183,6 +188,8 @@ StoragePtr StorageStripeLog::create( void StorageStripeLog::rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) { + Poco::ScopedWriteRWLock lock(rwlock); + /// Переименовываем директорию с данными. Poco::File(path + escapeForFileName(name)).renameTo(new_path_to_db + escapeForFileName(new_table_name)); @@ -199,11 +206,38 @@ BlockInputStreams StorageStripeLog::read( const Settings & settings, QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, - const unsigned threads) + unsigned threads) { + Poco::ScopedReadRWLock lock(rwlock); + check(column_names); processed_stage = QueryProcessingStage::FetchColumns; - return BlockInputStreams(1, new StripeLogBlockInputStream(column_names, *this, settings.max_read_buffer_size)); + + NameSet column_names_set(column_names.begin(), column_names.end()); + + CompressedReadBufferFromFile index_in(full_path() + "index.mrk", 0, 0, INDEX_BUFFER_SIZE); + Poco::SharedPtr index = new IndexForNativeFormat(index_in, column_names_set); + + BlockInputStreams res; + + size_t size = index->blocks.size(); + if (threads > size) + threads = size; + + for (size_t thread = 0; thread < threads; ++thread) + { + IndexForNativeFormat::Blocks::const_iterator begin = index->blocks.begin(); + IndexForNativeFormat::Blocks::const_iterator end = index->blocks.begin(); + + std::advance(begin, thread * size / threads); + std::advance(end, (thread + 1) * size / threads); + + res.emplace_back(new StripeLogBlockInputStream(column_names_set, *this, settings.max_read_buffer_size, index, begin, end)); + } + + /// Непосредственно во время чтения не держим read lock, потому что мы читаем диапазоны данных, которые не меняются. + + return res; } @@ -214,12 +248,9 @@ BlockOutputStreamPtr StorageStripeLog::write( } -void StorageStripeLog::drop() -{ -} - bool StorageStripeLog::checkData() const { + Poco::ScopedReadRWLock lock(const_cast(rwlock)); return file_checker.check(); } From 38fa9c8982f3878ccdb649c9ae27a59cf6de339e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 11:21:19 +0300 Subject: [PATCH 80/88] dbms: addition to prev. revision [#METR-17716]. --- dbms/src/Interpreters/loadMetadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 07c4be82267..76652d43a7d 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -41,7 +41,7 @@ static void executeCreateQuery(const String & query, Context & context, const St { if (const auto id = dynamic_cast(ast_create_query.storage.get())) { - if (id->name == "TinyLog") + if (id->name == "TinyLog" || id->name == "StripeLog") { tryLogCurrentException(__PRETTY_FUNCTION__); return; From f0a5ec47367589ae7703739c46fff0631374480a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 16:00:22 +0300 Subject: [PATCH 81/88] dbms: unification; using huge pages (experimental) [#METR-2944]. --- dbms/include/DB/Common/Allocator.h | 124 ++++++++++++++++++ dbms/include/DB/Common/Arena.h | 14 +- .../DB/Common/HashTable/HashTableAllocator.h | 15 ++- dbms/include/DB/Common/PODArray.h | 59 +-------- dbms/include/DB/IO/BufferWithOwnMemory.h | 52 +++----- 5 files changed, 163 insertions(+), 101 deletions(-) create mode 100644 dbms/include/DB/Common/Allocator.h diff --git a/dbms/include/DB/Common/Allocator.h b/dbms/include/DB/Common/Allocator.h new file mode 100644 index 00000000000..524d88f010f --- /dev/null +++ b/dbms/include/DB/Common/Allocator.h @@ -0,0 +1,124 @@ +#pragma once + +#include +#include +#include + +#include +#include +#include + + +/** Отвечает за выделение/освобождение памяти. Используется, например, в PODArray, Arena. + * Интерфейс отличается от std::allocator + * - наличием метода realloc, который для больших кусков памяти использует mremap; + * - передачей размера в метод free; + * - наличием аргумента alignment; + */ +class Allocator +{ +private: + /** См. комментарий в HashTableAllocator.h + */ + static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20); + static constexpr size_t HUGE_PAGE_SIZE = 2 * (1 << 20); + static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; + static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; + +public: + /// Выделить кусок памяти. + void * alloc(size_t size, size_t alignment = 0) + { + if (current_memory_tracker) + current_memory_tracker->alloc(size); + + void * buf; + + if (size >= MMAP_THRESHOLD) + { + if (alignment > MMAP_MIN_ALIGNMENT) + throw DB::Exception("Too large alignment: more than page size.", DB::ErrorCodes::BAD_ARGUMENTS); + + buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno("Allocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + /// См. комментарий в HashTableAllocator.h + if (size >= HUGE_PAGE_SIZE && 0 != madvise(buf, size, MADV_HUGEPAGE)) + DB::throwFromErrno("HashTableAllocator: Cannot madvise with MADV_HUGEPAGE.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } + else + { + if (alignment <= MALLOC_MIN_ALIGNMENT) + { + buf = ::malloc(size); + + if (nullptr == buf) + DB::throwFromErrno("Allocator: Cannot malloc.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } + else + { + buf = nullptr; + int res = posix_memalign(&buf, alignment, size); + + if (0 != res) + DB::throwFromErrno("Cannot allocate memory (posix_memalign)", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); + } + } + + return buf; + } + + /// Освободить память. + void free(void * buf, size_t size) + { + if (size >= MMAP_THRESHOLD) + { + if (0 != munmap(buf, size)) + DB::throwFromErrno("Allocator: Cannot munmap.", DB::ErrorCodes::CANNOT_MUNMAP); + } + else + { + ::free(buf); + } + + if (current_memory_tracker) + current_memory_tracker->free(size); + } + + /** Увеличить размер куска памяти. + * Содержимое старого куска памяти переезжает в начало нового. + * Положение куска памяти может измениться. + */ + void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) + { + if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD && alignment <= MALLOC_MIN_ALIGNMENT) + { + if (current_memory_tracker) + current_memory_tracker->realloc(old_size, new_size); + + buf = ::realloc(buf, new_size); + + if (nullptr == buf) + DB::throwFromErrno("Allocator: Cannot realloc.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } + else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) + { + if (current_memory_tracker) + current_memory_tracker->realloc(old_size, new_size); + + buf = mremap(buf, old_size, new_size, MREMAP_MAYMOVE); + if (MAP_FAILED == buf) + DB::throwFromErrno("Allocator: Cannot mremap.", DB::ErrorCodes::CANNOT_MREMAP); + } + else + { + void * new_buf = alloc(new_size, alignment); + memcpy(new_buf, buf, old_size); + free(buf, old_size); + buf = new_buf; + } + + return buf; + } +}; diff --git a/dbms/include/DB/Common/Arena.h b/dbms/include/DB/Common/Arena.h index 5e44bf7df70..a1b0211452e 100644 --- a/dbms/include/DB/Common/Arena.h +++ b/dbms/include/DB/Common/Arena.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB @@ -25,7 +25,7 @@ class Arena { private: /// Непрерывный кусок памяти и указатель на свободное место в нём. Односвязный список. - struct Chunk : private std::allocator /// empty base optimization + struct Chunk : private Allocator /// empty base optimization { char * begin; char * pos; @@ -38,10 +38,7 @@ private: ProfileEvents::increment(ProfileEvents::ArenaAllocChunks); ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_); - if (current_memory_tracker) - current_memory_tracker->alloc(size_); - - begin = allocate(size_); + begin = reinterpret_cast(Allocator::alloc(size_)); pos = begin; end = begin + size_; prev = prev_; @@ -49,10 +46,7 @@ private: ~Chunk() { - deallocate(begin, size()); - - if (current_memory_tracker) - current_memory_tracker->free(size()); + Allocator::free(begin, size()); if (prev) delete prev; diff --git a/dbms/include/DB/Common/HashTable/HashTableAllocator.h b/dbms/include/DB/Common/HashTable/HashTableAllocator.h index e3d9b462c39..5c36857406c 100644 --- a/dbms/include/DB/Common/HashTable/HashTableAllocator.h +++ b/dbms/include/DB/Common/HashTable/HashTableAllocator.h @@ -19,6 +19,7 @@ /** Общая часть разных хэш-таблиц, отвечающая за выделение/освобождение памяти. + * Отличается от Allocator тем, что зануляет память. * Используется в качестве параметра шаблона (есть несколько реализаций с таким же интерфейсом). */ class HashTableAllocator @@ -33,9 +34,9 @@ private: * Рассчитываем, что набор операций mmap/что-то сделать/mremap может выполняться всего лишь около 1000 раз в секунду. * * PS. Также это требуется, потому что tcmalloc не может выделить кусок памяти больше 16 GB. - * NOTE Можно попробовать MAP_HUGETLB, но придётся самостоятельно управлять количеством доступных страниц. */ static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20); + static constexpr size_t HUGE_PAGE_SIZE = 2 * (1 << 20); public: /// Выделить кусок памяти и заполнить его нулями. @@ -52,6 +53,14 @@ public: if (MAP_FAILED == buf) DB::throwFromErrno("HashTableAllocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /** Использование huge pages позволяет увеличить производительность более чем в три раза + * в запросе SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k, + * (хэш-таблица на 1 000 000 элементов) + * и примерно на 15% в случае хэш-таблицы на 100 000 000 элементов. + */ + if (size >= HUGE_PAGE_SIZE && 0 != madvise(buf, size, MADV_HUGEPAGE)) + DB::throwFromErrno("HashTableAllocator: Cannot madvise with MADV_HUGEPAGE.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /// Заполнение нулями не нужно - mmap сам это делает. } else @@ -108,6 +117,10 @@ public: if (MAP_FAILED == buf) DB::throwFromErrno("HashTableAllocator: Cannot mremap.", DB::ErrorCodes::CANNOT_MREMAP); + /** Здесь не получается сделать madvise с MADV_HUGEPAGE. + * Похоже, что при mremap, huge pages сами расширяются на новую область. + */ + /// Заполнение нулями не нужно. } else diff --git a/dbms/include/DB/Common/PODArray.h b/dbms/include/DB/Common/PODArray.h index 7d96889483e..7f0ef5758da 100644 --- a/dbms/include/DB/Common/PODArray.h +++ b/dbms/include/DB/Common/PODArray.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -12,7 +11,7 @@ #include #include -#include +#include #include #include @@ -32,28 +31,18 @@ namespace DB * Конструктор по-умолчанию создаёт пустой объект, который не выделяет память. * Затем выделяется память минимум под POD_ARRAY_INITIAL_SIZE элементов. * - * При первом выделении памяти использует std::allocator. - * В реализации из libstdc++ он кэширует куски памяти несколько больше, чем обычный malloc. - * - * При изменении размера, использует realloc, который может (но не обязан) использовать mremap для больших кусков памяти. - * По факту, mremap используется при использовании аллокатора из glibc, но не используется, например, в tcmalloc. - * * Если вставлять элементы push_back-ом, не делая reserve, то PODArray примерно в 2.5 раза быстрее std::vector. */ #define POD_ARRAY_INITIAL_SIZE 4096UL template -class PODArray : private boost::noncopyable, private std::allocator /// empty base optimization +class PODArray : private boost::noncopyable, private Allocator /// empty base optimization { private: - typedef std::allocator Allocator; - char * c_start; char * c_end; char * c_end_of_storage; - bool use_libc_realloc = false; - T * t_start() { return reinterpret_cast(c_start); } T * t_end() { return reinterpret_cast(c_end); } T * t_end_of_storage() { return reinterpret_cast(c_end_of_storage); } @@ -90,10 +79,7 @@ private: size_t bytes_to_alloc = to_size(n); - if (current_memory_tracker) - current_memory_tracker->alloc(bytes_to_alloc); - - c_start = c_end = Allocator::allocate(bytes_to_alloc); + c_start = c_end = reinterpret_cast(Allocator::alloc(bytes_to_alloc)); c_end_of_storage = c_start + bytes_to_alloc; } @@ -102,13 +88,7 @@ private: if (c_start == nullptr) return; - if (use_libc_realloc) - ::free(c_start); - else - Allocator::deallocate(c_start, storage_size()); - - if (current_memory_tracker) - current_memory_tracker->free(storage_size()); + Allocator::free(c_start, storage_size()); } void realloc(size_t n) @@ -122,38 +102,10 @@ private: ptrdiff_t end_diff = c_end - c_start; size_t bytes_to_alloc = to_size(n); - char * old_c_start = c_start; - char * old_c_end_of_storage = c_end_of_storage; - - if (current_memory_tracker) - current_memory_tracker->realloc(storage_size(), bytes_to_alloc); - - if (use_libc_realloc) - { - auto new_c_start = reinterpret_cast(::realloc(c_start, bytes_to_alloc)); - - if (nullptr == new_c_start) - throwFromErrno("PODArray: cannot realloc", ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - c_start = new_c_start; - } - else - { - auto new_c_start = reinterpret_cast(malloc(bytes_to_alloc)); - - if (nullptr == new_c_start) - throwFromErrno("PODArray: cannot realloc", ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - c_start = new_c_start; - - memcpy(c_start, old_c_start, std::min(bytes_to_alloc, static_cast(end_diff))); - Allocator::deallocate(old_c_start, old_c_end_of_storage - old_c_start); - } + c_start = reinterpret_cast(Allocator::realloc(c_start, storage_size(), bytes_to_alloc)); c_end = c_start + end_diff; c_end_of_storage = c_start + bytes_to_alloc; - - use_libc_realloc = true; } public: @@ -187,7 +139,6 @@ public: std::swap(c_start, other.c_start); std::swap(c_end, other.c_end); std::swap(c_end_of_storage, other.c_end_of_storage); - std::swap(use_libc_realloc, other.use_libc_realloc); return *this; } diff --git a/dbms/include/DB/IO/BufferWithOwnMemory.h b/dbms/include/DB/IO/BufferWithOwnMemory.h index 9c63e592e94..96aa8728428 100644 --- a/dbms/include/DB/IO/BufferWithOwnMemory.h +++ b/dbms/include/DB/IO/BufferWithOwnMemory.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include @@ -18,7 +18,7 @@ namespace DB * Отличается тем, что не делает лишний memset. (И почти ничего не делает.) * Также можно попросить выделять выровненный кусок памяти. */ -struct Memory : boost::noncopyable +struct Memory : boost::noncopyable, Allocator { size_t m_capacity = 0; size_t m_size = 0; @@ -66,16 +66,22 @@ struct Memory : boost::noncopyable } else { - dealloc(); - + new_size = align(new_size); + m_data = reinterpret_cast(Allocator::realloc(m_data, m_capacity, new_size, alignment)); m_capacity = new_size; m_size = m_capacity; - - alloc(); } } private: + size_t align(size_t value) const + { + if (!alignment) + return value; + + return (value + alignment - 1) / alignment * alignment; + } + void alloc() { if (!m_capacity) @@ -87,33 +93,10 @@ private: ProfileEvents::increment(ProfileEvents::IOBufferAllocs); ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, m_capacity); - if (current_memory_tracker) - current_memory_tracker->alloc(m_capacity); - - char * new_m_data = nullptr; - - if (!alignment) - { - new_m_data = reinterpret_cast(malloc(m_capacity)); - - if (!new_m_data) - throw Exception("Cannot allocate memory (malloc)", ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - m_data = new_m_data; - - return; - } - - size_t aligned_capacity = (m_capacity + alignment - 1) / alignment * alignment; - m_capacity = aligned_capacity; + size_t new_capacity = align(m_capacity); + m_data = reinterpret_cast(Allocator::alloc(new_capacity, alignment)); + m_capacity = new_capacity; m_size = m_capacity; - - int res = posix_memalign(reinterpret_cast(&new_m_data), alignment, m_capacity); - - if (0 != res) - DB::throwFromErrno("Cannot allocate memory (posix_memalign)", ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); - - m_data = new_m_data; } void dealloc() @@ -121,11 +104,8 @@ private: if (!m_data) return; - free(reinterpret_cast(m_data)); + Allocator::free(reinterpret_cast(m_data), m_capacity); m_data = nullptr; /// Чтобы избежать double free, если последующий вызов alloc кинет исключение. - - if (current_memory_tracker) - current_memory_tracker->free(m_capacity); } }; From f68478b63897fbb9214597158b5ba0f203abef32 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 17:44:02 +0300 Subject: [PATCH 82/88] dbms: using huge pages everywhere (experimental) [#METR-2944]. --- dbms/include/DB/Common/Allocator.h | 5 ----- .../DB/Common/HashTable/HashTableAllocator.h | 13 ------------- dbms/src/Server/Server.cpp | 19 +++++++++++++++++++ 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/Common/Allocator.h b/dbms/include/DB/Common/Allocator.h index 524d88f010f..95f805f5a1d 100644 --- a/dbms/include/DB/Common/Allocator.h +++ b/dbms/include/DB/Common/Allocator.h @@ -21,7 +21,6 @@ private: /** См. комментарий в HashTableAllocator.h */ static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20); - static constexpr size_t HUGE_PAGE_SIZE = 2 * (1 << 20); static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; @@ -42,10 +41,6 @@ public: buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == buf) DB::throwFromErrno("Allocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - /// См. комментарий в HashTableAllocator.h - if (size >= HUGE_PAGE_SIZE && 0 != madvise(buf, size, MADV_HUGEPAGE)) - DB::throwFromErrno("HashTableAllocator: Cannot madvise with MADV_HUGEPAGE.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); } else { diff --git a/dbms/include/DB/Common/HashTable/HashTableAllocator.h b/dbms/include/DB/Common/HashTable/HashTableAllocator.h index 5c36857406c..685acbd9b28 100644 --- a/dbms/include/DB/Common/HashTable/HashTableAllocator.h +++ b/dbms/include/DB/Common/HashTable/HashTableAllocator.h @@ -36,7 +36,6 @@ private: * PS. Также это требуется, потому что tcmalloc не может выделить кусок памяти больше 16 GB. */ static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20); - static constexpr size_t HUGE_PAGE_SIZE = 2 * (1 << 20); public: /// Выделить кусок памяти и заполнить его нулями. @@ -53,14 +52,6 @@ public: if (MAP_FAILED == buf) DB::throwFromErrno("HashTableAllocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - /** Использование huge pages позволяет увеличить производительность более чем в три раза - * в запросе SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k, - * (хэш-таблица на 1 000 000 элементов) - * и примерно на 15% в случае хэш-таблицы на 100 000 000 элементов. - */ - if (size >= HUGE_PAGE_SIZE && 0 != madvise(buf, size, MADV_HUGEPAGE)) - DB::throwFromErrno("HashTableAllocator: Cannot madvise with MADV_HUGEPAGE.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - /// Заполнение нулями не нужно - mmap сам это делает. } else @@ -117,10 +108,6 @@ public: if (MAP_FAILED == buf) DB::throwFromErrno("HashTableAllocator: Cannot mremap.", DB::ErrorCodes::CANNOT_MREMAP); - /** Здесь не получается сделать madvise с MADV_HUGEPAGE. - * Похоже, что при mremap, huge pages сами расширяются на новую область. - */ - /// Заполнение нулями не нужно. } else diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index a2bc499db88..4760aadf95c 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -44,6 +44,8 @@ #include +#include + #include "Server.h" #include "HTTPHandler.h" #include "InterserverIOHTTPHandler.h" @@ -424,6 +426,23 @@ int Server::main(const std::vector & args) { Logger * log = &logger(); + /** Использование huge pages позволяет увеличить производительность более чем в три раза + * в запросе SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k, + * (хэш-таблица на 1 000 000 элементов) + * и примерно на 15% в случае хэш-таблицы на 100 000 000 элементов. + */ + if (!MallocHook::AddMmapHook([](const void * result, const void * start, size_t size, int protection, int flags, int fd, off_t offset) + { + const auto HUGE_PAGE_SIZE = 1 << 21; + + if (result != MAP_FAILED + && size >= HUGE_PAGE_SIZE + && (flags & MAP_PRIVATE) + && (flags & MAP_ANONYMOUS)) + (void)madvise(const_cast(result), size, MADV_HUGEPAGE); + })) + LOG_WARNING(log, "Cannot set mmap hook."); + std::string path = config().getString("path"); Poco::trimInPlace(path); if (path.empty()) From 3d2a7669646f0e2cafd583c862d0d0e9ea56a3c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 16 Aug 2015 18:11:06 +0300 Subject: [PATCH 83/88] Revert "dbms: using huge pages everywhere (experimental) [#METR-2944]." This reverts commit a127a866704f95ef31684ef042d0765f7202677a. --- dbms/include/DB/Common/Allocator.h | 5 +++++ .../DB/Common/HashTable/HashTableAllocator.h | 13 +++++++++++++ dbms/src/Server/Server.cpp | 19 ------------------- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/dbms/include/DB/Common/Allocator.h b/dbms/include/DB/Common/Allocator.h index 95f805f5a1d..524d88f010f 100644 --- a/dbms/include/DB/Common/Allocator.h +++ b/dbms/include/DB/Common/Allocator.h @@ -21,6 +21,7 @@ private: /** См. комментарий в HashTableAllocator.h */ static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20); + static constexpr size_t HUGE_PAGE_SIZE = 2 * (1 << 20); static constexpr size_t MMAP_MIN_ALIGNMENT = 4096; static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; @@ -41,6 +42,10 @@ public: buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (MAP_FAILED == buf) DB::throwFromErrno("Allocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + /// См. комментарий в HashTableAllocator.h + if (size >= HUGE_PAGE_SIZE && 0 != madvise(buf, size, MADV_HUGEPAGE)) + DB::throwFromErrno("HashTableAllocator: Cannot madvise with MADV_HUGEPAGE.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); } else { diff --git a/dbms/include/DB/Common/HashTable/HashTableAllocator.h b/dbms/include/DB/Common/HashTable/HashTableAllocator.h index 685acbd9b28..5c36857406c 100644 --- a/dbms/include/DB/Common/HashTable/HashTableAllocator.h +++ b/dbms/include/DB/Common/HashTable/HashTableAllocator.h @@ -36,6 +36,7 @@ private: * PS. Также это требуется, потому что tcmalloc не может выделить кусок памяти больше 16 GB. */ static constexpr size_t MMAP_THRESHOLD = 64 * (1 << 20); + static constexpr size_t HUGE_PAGE_SIZE = 2 * (1 << 20); public: /// Выделить кусок памяти и заполнить его нулями. @@ -52,6 +53,14 @@ public: if (MAP_FAILED == buf) DB::throwFromErrno("HashTableAllocator: Cannot mmap.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /** Использование huge pages позволяет увеличить производительность более чем в три раза + * в запросе SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k, + * (хэш-таблица на 1 000 000 элементов) + * и примерно на 15% в случае хэш-таблицы на 100 000 000 элементов. + */ + if (size >= HUGE_PAGE_SIZE && 0 != madvise(buf, size, MADV_HUGEPAGE)) + DB::throwFromErrno("HashTableAllocator: Cannot madvise with MADV_HUGEPAGE.", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /// Заполнение нулями не нужно - mmap сам это делает. } else @@ -108,6 +117,10 @@ public: if (MAP_FAILED == buf) DB::throwFromErrno("HashTableAllocator: Cannot mremap.", DB::ErrorCodes::CANNOT_MREMAP); + /** Здесь не получается сделать madvise с MADV_HUGEPAGE. + * Похоже, что при mremap, huge pages сами расширяются на новую область. + */ + /// Заполнение нулями не нужно. } else diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 4760aadf95c..a2bc499db88 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -44,8 +44,6 @@ #include -#include - #include "Server.h" #include "HTTPHandler.h" #include "InterserverIOHTTPHandler.h" @@ -426,23 +424,6 @@ int Server::main(const std::vector & args) { Logger * log = &logger(); - /** Использование huge pages позволяет увеличить производительность более чем в три раза - * в запросе SELECT number % 1000000 AS k, count() FROM system.numbers GROUP BY k, - * (хэш-таблица на 1 000 000 элементов) - * и примерно на 15% в случае хэш-таблицы на 100 000 000 элементов. - */ - if (!MallocHook::AddMmapHook([](const void * result, const void * start, size_t size, int protection, int flags, int fd, off_t offset) - { - const auto HUGE_PAGE_SIZE = 1 << 21; - - if (result != MAP_FAILED - && size >= HUGE_PAGE_SIZE - && (flags & MAP_PRIVATE) - && (flags & MAP_ANONYMOUS)) - (void)madvise(const_cast(result), size, MADV_HUGEPAGE); - })) - LOG_WARNING(log, "Cannot set mmap hook."); - std::string path = config().getString("path"); Poco::trimInPlace(path); if (path.empty()) From f1655cc62f80991ae05ef914613ec6e4cdcb3eab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Aug 2015 00:09:36 +0300 Subject: [PATCH 84/88] Merge --- .../DB/Storages/MergeTree/ActiveDataPartSet.h | 18 ++++++-------- .../MergeTree/MergeTreeBlockOutputStream.h | 2 +- .../DB/Storages/MergeTree/MergeTreeData.h | 2 +- .../Storages/MergeTree/MergeTreeDataWriter.h | 2 +- .../ReplicatedMergeTreeBlockOutputStream.h | 2 +- .../DB/Storages/StorageReplicatedMergeTree.h | 7 ++++-- .../Storages/MergeTree/ActiveDataPartSet.cpp | 17 ++++++++----- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 15 ++++++------ .../MergeTree/MergeTreeDataMerger.cpp | 16 ++++--------- .../MergeTree/MergeTreeDataWriter.cpp | 11 ++++++--- dbms/src/Storages/StorageMergeTree.cpp | 2 +- .../Storages/StorageReplicatedMergeTree.cpp | 24 +++++++++---------- dbms/src/Storages/StorageSystemParts.cpp | 14 +++++------ 13 files changed, 65 insertions(+), 67 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h b/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h index 3cfdc8d8942..d80556903d2 100644 --- a/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h +++ b/dbms/include/DB/Storages/MergeTree/ActiveDataPartSet.h @@ -23,19 +23,16 @@ public: { DayNum_t left_date; DayNum_t right_date; - UInt64 left; - UInt64 right; + Int64 left; + Int64 right; UInt32 level; std::string name; - DayNum_t left_month; - DayNum_t right_month; + DayNum_t month; bool operator<(const Part & rhs) const { - if (left_month != rhs.left_month) - return left_month < rhs.left_month; - if (right_month != rhs.right_month) - return right_month < rhs.right_month; + if (month != rhs.month) + return month < rhs.month; if (left != rhs.left) return left < rhs.left; @@ -48,8 +45,7 @@ public: /// Содержит другой кусок (получен после объединения другого куска с каким-то ещё) bool contains(const Part & rhs) const { - return left_month == rhs.left_month /// Куски за разные месяцы не объединяются - && right_month == rhs.right_month + return month == rhs.month /// Куски за разные месяцы не объединяются && left_date <= rhs.left_date && right_date >= rhs.right_date && left <= rhs.left @@ -66,7 +62,7 @@ public: size_t size() const; - static String getPartName(DayNum_t left_date, DayNum_t right_date, UInt64 left_id, UInt64 right_id, UInt64 level); + static String getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level); /// Возвращает true если имя директории совпадает с форматом имени директории кусочков static bool isPartDirectory(const String & dir_name, Poco::RegularExpression::MatchVec * out_matches = nullptr); diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockOutputStream.h b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockOutputStream.h index 80d3788eafc..d7a163d64ca 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockOutputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockOutputStream.h @@ -33,7 +33,7 @@ public: auto part_blocks = storage.writer.splitBlockIntoParts(block); for (auto & current_block : part_blocks) { - UInt64 temp_index = storage.increment.get(); + Int64 temp_index = storage.increment.get(); MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block, temp_index); storage.data.renameTempPartAndAdd(part, &storage.increment); diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeData.h b/dbms/include/DB/Storages/MergeTree/MergeTreeData.h index 47153ecc8f9..06d7f101b80 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeData.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeData.h @@ -625,7 +625,7 @@ public: || mode == Mode::Aggregating; } - UInt64 getMaxDataPartIndex(); + Int64 getMaxDataPartIndex(); std::string getTableName() const override { diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeDataWriter.h b/dbms/include/DB/Storages/MergeTree/MergeTreeDataWriter.h index 91314c92edb..c520e0349d5 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeDataWriter.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeDataWriter.h @@ -43,7 +43,7 @@ public: * temp_index - значение left и right для нового куска. Можно будет изменить при переименовании. * Возвращает кусок с именем, начинающимся с tmp_, еще не добавленный в MergeTreeData. */ - MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithDateInterval & block, UInt64 temp_index); + MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithDateInterval & block, Int64 temp_index); private: MergeTreeData & data; diff --git a/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h b/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h index 72aee9e0e62..cbac9398b36 100644 --- a/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h +++ b/dbms/include/DB/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h @@ -38,7 +38,7 @@ public: AbandonableLockInZooKeeper block_number_lock = storage.allocateBlockNumber(month_name); - UInt64 part_number = block_number_lock.getNumber(); + Int64 part_number = block_number_lock.getNumber(); MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block, part_number); String part_name = ActiveDataPartSet::getPartName(part->left_date, part->right_date, part->left, part->right, part->level); diff --git a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h index db85fdede2c..225945fa3a3 100644 --- a/dbms/include/DB/Storages/StorageReplicatedMergeTree.h +++ b/dbms/include/DB/Storages/StorageReplicatedMergeTree.h @@ -393,8 +393,11 @@ private: */ void waitForReplicaToProcessLogEntry(const String & replica_name, const LogEntry & entry); - /// Преобразовать число в строку формате суффиксов автоинкрементных нод в ZooKeeper. - static String padIndex(UInt64 index) + /** Преобразовать число в строку формате суффиксов автоинкрементных нод в ZooKeeper. + * Поддерживаются также отрицательные числа - для них имя ноды выглядит несколько глупо + * и не соответствует никакой автоинкрементной ноде в ZK. + */ + static String padIndex(Int64 index) { String index_str = toString(index); return std::string(10 - index_str.size(), '0') + index_str; diff --git a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp index cadd0778888..cb2a091d6d5 100644 --- a/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/dbms/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -103,7 +103,7 @@ size_t ActiveDataPartSet::size() const } -String ActiveDataPartSet::getPartName(DayNum_t left_date, DayNum_t right_date, UInt64 left_id, UInt64 right_id, UInt64 level) +String ActiveDataPartSet::getPartName(DayNum_t left_date, DayNum_t right_date, Int64 left_id, Int64 right_id, UInt64 level) { const auto & date_lut = DateLUT::instance(); @@ -133,7 +133,7 @@ String ActiveDataPartSet::getPartName(DayNum_t left_date, DayNum_t right_date, U bool ActiveDataPartSet::isPartDirectory(const String & dir_name, Poco::RegularExpression::MatchVec * out_matches) { Poco::RegularExpression::MatchVec matches; - static Poco::RegularExpression file_name_regexp("^(\\d{8})_(\\d{8})_(\\d+)_(\\d+)_(\\d+)"); + static Poco::RegularExpression file_name_regexp("^(\\d{8})_(\\d{8})_(-?\\d+)_(-?\\d+)_(\\d+)"); bool res = (file_name_regexp.match(dir_name, 0, matches) && 6 == matches.size()); if (out_matches) *out_matches = matches; @@ -157,12 +157,17 @@ void ActiveDataPartSet::parsePartName(const String & file_name, Part & part, con part.left_date = date_lut.YYYYMMDDToDayNum(parse(file_name.substr(matches[1].offset, matches[1].length))); part.right_date = date_lut.YYYYMMDDToDayNum(parse(file_name.substr(matches[2].offset, matches[2].length))); - part.left = parse(file_name.substr(matches[3].offset, matches[3].length)); - part.right = parse(file_name.substr(matches[4].offset, matches[4].length)); + part.left = parse(file_name.substr(matches[3].offset, matches[3].length)); + part.right = parse(file_name.substr(matches[4].offset, matches[4].length)); part.level = parse(file_name.substr(matches[5].offset, matches[5].length)); - part.left_month = date_lut.toFirstDayNumOfMonth(part.left_date); - part.right_month = date_lut.toFirstDayNumOfMonth(part.right_date); + DayNum_t left_month = date_lut.toFirstDayNumOfMonth(part.left_date); + DayNum_t right_month = date_lut.toFirstDayNumOfMonth(part.right_date); + + if (left_month != right_month) + throw Exception("Part name " + file_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); + + part.month = left_month; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 11cb7d298d8..5f21b4e775e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -112,9 +112,9 @@ MergeTreeData::MergeTreeData( throw Exception("Primary key could be empty only for UnsortedMergeTree", ErrorCodes::BAD_ARGUMENTS); } -UInt64 MergeTreeData::getMaxDataPartIndex() +Int64 MergeTreeData::getMaxDataPartIndex() { - UInt64 max_part_id = 0; + Int64 max_part_id = 0; for (const auto & part : data_parts) max_part_id = std::max(max_part_id, part->right); @@ -264,9 +264,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) while (curr_jt != data_parts.end()) { /// Куски данных за разные месяцы рассматривать не будем - if ((*curr_jt)->left_month != (*curr_jt)->right_month - || (*curr_jt)->right_month != (*prev_jt)->left_month - || (*prev_jt)->left_month != (*prev_jt)->right_month) + if ((*curr_jt)->month != (*prev_jt)->month) { ++prev_jt; ++curr_jt; @@ -734,6 +732,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( bool obsolete = false; /// Покрыт ли part каким-нибудь куском. DataPartsVector res; + /// Куски, содержащиеся в part, идут в data_parts подряд, задевая место, куда вставился бы сам part. DataParts::iterator it = data_parts.lower_bound(part); /// Пойдем влево. @@ -841,7 +840,7 @@ void MergeTreeData::renameAndDetachPart(const DataPartPtr & part, const String & Strings restored; bool error = false; - UInt64 pos = part->left; + Int64 pos = part->left; if (it != all_data_parts.begin()) { @@ -934,13 +933,13 @@ size_t MergeTreeData::getMaxPartsCountForMonth() for (const auto & part : data_parts) { - if (part->left_month == cur_month) + if (part->month == cur_month) { ++cur_count; } else { - cur_month = part->left_month; + cur_month = part->month; cur_count = 1; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp index 6d47840c076..f69f51be70f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMerger.cpp @@ -110,13 +110,6 @@ bool MergeTreeDataMerger::selectPartsToMerge(MergeTreeData::DataPartsVector & pa continue; } - /// Кусок в одном месяце. - if (first_part->left_month != first_part->right_month) - { - LOG_WARNING(log, "Part " << first_part->name << " spans more than one month"); - continue; - } - /// Самый длинный валидный отрезок, начинающийся здесь. size_t cur_longest_max = -1U; size_t cur_longest_min = -1U; @@ -128,8 +121,8 @@ bool MergeTreeDataMerger::selectPartsToMerge(MergeTreeData::DataPartsVector & pa size_t cur_sum = first_part->size_in_bytes; int cur_len = 1; - DayNum_t month = first_part->left_month; - UInt64 cur_id = first_part->right; + DayNum_t month = first_part->month; + Int64 cur_id = first_part->right; /// Этот месяц кончился хотя бы день назад. bool is_old_month = now_day - now_month >= 1 && now_month > month; @@ -151,9 +144,8 @@ bool MergeTreeDataMerger::selectPartsToMerge(MergeTreeData::DataPartsVector & pa const MergeTreeData::DataPartPtr & last_part = *jt; /// Кусок разрешено сливать с предыдущим, и в одном правильном месяце. - if (last_part->left_month != last_part->right_month || - last_part->left_month != month || - !can_merge(prev_part, last_part)) + if (last_part->month != month + || !can_merge(prev_part, last_part)) { break; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 6055b799430..87e0952c6b9 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -71,7 +71,7 @@ BlocksWithDateIntervals MergeTreeDataWriter::splitBlockIntoParts(const Block & b return res; } -MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDateInterval & block_with_dates, UInt64 temp_index) +MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDateInterval & block_with_dates, Int64 temp_index) { Block & block = block_with_dates.block; UInt16 min_date = block_with_dates.min_date; @@ -79,6 +79,12 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa const auto & date_lut = DateLUT::instance(); + DayNum_t min_month = date_lut.toFirstDayNumOfMonth(DayNum_t(min_date)); + DayNum_t max_month = date_lut.toFirstDayNumOfMonth(DayNum_t(max_date)); + + if (min_month != max_month) + throw Exception("Logical error: part spans more than one month."); + size_t part_size = (block.rows() + data.index_granularity - 1) / data.index_granularity; String tmp_part_name = "tmp_" + ActiveDataPartSet::getPartName( @@ -124,8 +130,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDa new_data_part->level = 0; new_data_part->size = part_size; new_data_part->modification_time = time(0); - new_data_part->left_month = date_lut.toFirstDayNumOfMonth(new_data_part->left_date); - new_data_part->right_month = date_lut.toFirstDayNumOfMonth(new_data_part->right_date); + new_data_part->month = min_month; new_data_part->columns = columns; new_data_part->checksums = checksums; new_data_part->index.swap(out.getIndex()); diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 2b0a872d7ce..49123d7017d 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -297,7 +297,7 @@ void StorageMergeTree::dropPartition(const Field & partition, bool detach, bool for (const auto & part : parts) { - if (!(part->left_month == part->right_month && part->left_month == month)) + if (part->month != month) continue; LOG_DEBUG(log, "Removing part " << part->name); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 67653e5f588..a143094161f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -26,7 +26,7 @@ namespace DB const auto ERROR_SLEEP_MS = 1000; const auto MERGE_SELECTING_SLEEP_MS = 5 * 1000; -const auto RESERVED_BLOCK_NUMBERS = 200; +const Int64 RESERVED_BLOCK_NUMBERS = 200; StorageReplicatedMergeTree::StorageReplicatedMergeTree( @@ -1272,7 +1272,8 @@ void StorageReplicatedMergeTree::mergeSelectingThread() auto zookeeper = getZooKeeper(); /// Можно слить куски, если все номера между ними заброшены - не соответствуют никаким блокам. - for (UInt64 number = left->right + 1; number <= right->left - 1; ++number) /// Номера блоков больше нуля. + /// Номера до RESERVED_BLOCK_NUMBERS всегда не соответствуют никаким блокам. + for (Int64 number = std::max(RESERVED_BLOCK_NUMBERS, left->right + 1); number <= right->left - 1; ++number) { String path1 = zookeeper_path + "/block_numbers/" + month_name + "/block-" + padIndex(number); String path2 = zookeeper_path + "/nonincrement_block_numbers/" + month_name + "/block-" + padIndex(number); @@ -1402,7 +1403,7 @@ void StorageReplicatedMergeTree::mergeSelectingThread() for (size_t i = 0; i + 1 < parts.size(); ++i) { /// Уберем больше не нужные отметки о несуществующих блоках. - for (UInt64 number = parts[i]->right + 1; number <= parts[i + 1]->left - 1; ++number) + for (Int64 number = std::max(RESERVED_BLOCK_NUMBERS, parts[i]->right + 1); number <= parts[i + 1]->left - 1; ++number) { zookeeper->tryRemove(zookeeper_path + "/block_numbers/" + month_name + "/block-" + padIndex(number)); zookeeper->tryRemove(zookeeper_path + "/nonincrement_block_numbers/" + month_name + "/block-" + padIndex(number)); @@ -2241,7 +2242,7 @@ void StorageReplicatedMergeTree::dropUnreplicatedPartition(const Field & partiti for (const auto & part : parts) { - if (!(part->left_month == part->right_month && part->left_month == month)) + if (part->month != month) continue; LOG_DEBUG(log, "Removing unreplicated part " << part->name); @@ -2279,7 +2280,7 @@ void StorageReplicatedMergeTree::dropPartition(const Field & field, bool detach, * NOTE: Если понадобится аналогично поддержать запрос DROP PART, для него придется придумать какой-нибудь новый механизм, * чтобы гарантировать этот инвариант. */ - UInt64 right; + Int64 right; { AbandonableLockInZooKeeper block_number_lock = allocateBlockNumber(month_name); @@ -2329,7 +2330,7 @@ void StorageReplicatedMergeTree::attachPartition(const Field & field, bool unrep String partition; if (attach_part) - partition = field.getType() == Field::Types::UInt64 ? toString(field.get()) : field.safeGet(); + partition = field.safeGet(); else partition = MergeTreeData::getMonthName(field); @@ -2369,19 +2370,16 @@ void StorageReplicatedMergeTree::attachPartition(const Field & field, bool unrep /// Выделим добавляемым кускам максимальные свободные номера, меньшие RESERVED_BLOCK_NUMBERS. /// NOTE: Проверка свободности номеров никак не синхронизируется. Выполнять несколько запросов ATTACH/DETACH/DROP одновременно нельзя. - UInt64 min_used_number = RESERVED_BLOCK_NUMBERS; + Int64 min_used_number = RESERVED_BLOCK_NUMBERS; + DayNum_t month = DateLUT::instance().makeDayNum(parse(partition.substr(0, 4)), parse(partition.substr(4, 2)), 0); { - /// TODO Это необходимо лишь в пределах одного месяца. auto existing_parts = data.getDataParts(); for (const auto & part : existing_parts) - min_used_number = std::min(min_used_number, part->left); + if (part->month == month) + min_used_number = std::min(min_used_number, part->left); } - if (parts.size() > min_used_number) - throw Exception("Not enough free small block numbers for attaching parts: " - + toString(parts.size()) + " needed, " + toString(min_used_number) + " available", ErrorCodes::NOT_ENOUGH_BLOCK_NUMBERS); - /// Добавим записи в лог. std::reverse(parts.begin(), parts.end()); std::list entries; diff --git a/dbms/src/Storages/StorageSystemParts.cpp b/dbms/src/Storages/StorageSystemParts.cpp index 67f6170dc0b..558f6a90564 100644 --- a/dbms/src/Storages/StorageSystemParts.cpp +++ b/dbms/src/Storages/StorageSystemParts.cpp @@ -29,8 +29,8 @@ StorageSystemParts::StorageSystemParts(const std::string & name_) {"refcount", new DataTypeUInt32}, {"min_date", new DataTypeDate}, {"max_date", new DataTypeDate}, - {"min_block_number", new DataTypeUInt64}, - {"max_block_number", new DataTypeUInt64}, + {"min_block_number", new DataTypeInt64}, + {"max_block_number", new DataTypeInt64}, {"level", new DataTypeUInt32}, {"database", new DataTypeString}, @@ -169,8 +169,8 @@ BlockInputStreams StorageSystemParts::read( ColumnPtr refcount_column = new ColumnUInt32; ColumnPtr min_date_column = new ColumnUInt16; ColumnPtr max_date_column = new ColumnUInt16; - ColumnPtr min_block_number_column = new ColumnUInt64; - ColumnPtr max_block_number_column = new ColumnUInt64; + ColumnPtr min_block_number_column = new ColumnInt64; + ColumnPtr max_block_number_column = new ColumnInt64; ColumnPtr level_column = new ColumnUInt32; for (size_t i = 0; i < filtered_database_column->size();) @@ -227,7 +227,7 @@ BlockInputStreams StorageSystemParts::read( table_column->insert(table); engine_column->insert(engine); - mysqlxx::Date partition_date {part->left_month}; + mysqlxx::Date partition_date {part->month}; String partition = toString(partition_date.year()) + (partition_date.month() < 10 ? "0" : "") + toString(partition_date.month()); partition_column->insert(partition); @@ -263,8 +263,8 @@ BlockInputStreams StorageSystemParts::read( block.insert(ColumnWithTypeAndName(refcount_column, new DataTypeUInt32, "refcount")); block.insert(ColumnWithTypeAndName(min_date_column, new DataTypeDate, "min_date")); block.insert(ColumnWithTypeAndName(max_date_column, new DataTypeDate, "max_date")); - block.insert(ColumnWithTypeAndName(min_block_number_column, new DataTypeUInt64, "min_block_number")); - block.insert(ColumnWithTypeAndName(max_block_number_column, new DataTypeUInt64, "max_block_number")); + block.insert(ColumnWithTypeAndName(min_block_number_column, new DataTypeInt64, "min_block_number")); + block.insert(ColumnWithTypeAndName(max_block_number_column, new DataTypeInt64, "max_block_number")); block.insert(ColumnWithTypeAndName(level_column, new DataTypeUInt32, "level")); block.insert(ColumnWithTypeAndName(database_column, new DataTypeString, "database")); block.insert(ColumnWithTypeAndName(table_column, new DataTypeString, "table")); From 276ca03bc2662bd78d5902cd4849fbd3b9317f10 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Aug 2015 18:20:38 +0300 Subject: [PATCH 85/88] dbms: fixed error [#METR-17745]. --- .../MergeTree/MergedBlockOutputStream.h | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h index 1e74e4cbcd7..80e460c30ed 100644 --- a/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h @@ -392,11 +392,16 @@ private: ? descr.column_name : block.getByPosition(descr.column_number).name; - primary_columns_name_to_position[name] = i; + if (!primary_columns_name_to_position.emplace(name, i).second) + throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS); primary_columns[i] = !descr.column_name.empty() ? block.getByName(descr.column_name) : block.getByPosition(descr.column_number); + + /// Столбцы первичного ключа переупорядочиваем заранее и складываем в primary_columns. + if (permutation) + primary_columns[i].column = primary_columns[i].column->permute(*permutation, 0); } /// Теперь пишем данные. @@ -406,12 +411,17 @@ private: if (permutation) { - ColumnPtr permutted_column = column.column->permute(*permutation, 0); - writeData(column.name, *column.type, *permutted_column, offset_columns); - auto primary_column_it = primary_columns_name_to_position.find(it.name); if (primary_columns_name_to_position.end() != primary_column_it) - primary_columns[primary_column_it->second] = ColumnWithTypeAndName{permutted_column, it.type, it.name}; + { + writeData(column.name, *column.type, *primary_columns[primary_column_it->second].column, offset_columns); + } + else + { + /// Столбцы, не входящие в первичный ключ, переупорядочиваем здесь; затем результат освобождается - для экономии оперативки. + ColumnPtr permutted_column = column.column->permute(*permutation, 0); + writeData(column.name, *column.type, *permutted_column, offset_columns); + } } else { @@ -422,12 +432,13 @@ private: /// Пишем индекс. Индекс содержит значение Primary Key для каждой index_granularity строки. for (size_t i = index_offset; i < rows; i += storage.index_granularity) { - for (const auto & primary_column : primary_columns) + if (storage.mode != MergeTreeData::Unsorted) { - if (storage.mode != MergeTreeData::Unsorted) + for (const auto & primary_column : primary_columns) + { index_vec.push_back((*primary_column.column)[i]); - - primary_column.type->serializeBinary(index_vec.back(), *index_stream); + primary_column.type->serializeBinary(index_vec.back(), *index_stream); + } } ++marks_count; From d192a76be8c51d519863fbe4238a374df0bb466a Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 18 Aug 2015 19:11:21 +0300 Subject: [PATCH 86/88] dbms: sequenceMatch: add iterations threshold with exception [#METR-17425] --- .../AggregateFunctionSequenceMatch.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h index 6a7b30ddbc7..255028c2d05 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -120,6 +120,10 @@ struct AggregateFunctionSequenceMatchData final } }; + +/// Max number of iterations to match the pattern against a sequence, exception thrown when exceeded +constexpr auto sequence_match_max_iterations = 1000000; + class AggregateFunctionSequenceMatch final : public IAggregateFunctionHelper { public: @@ -385,6 +389,7 @@ private: return false; }; + std::size_t i = 0; while (action_it != action_end && events_it != events_end) { // std::cout << "start_timestamp " << base_it->first << "; "; @@ -465,6 +470,12 @@ private: "Unknown PatternActionType", ErrorCodes::LOGICAL_ERROR }; + + if (++i > sequence_match_max_iterations) + throw Exception{ + "Pattern application proves too difficult, exceeding max iterations (" + toString(sequence_match_max_iterations) + ")", + ErrorCodes::TOO_SLOW + }; } /// if there are some actions remaining From bfd52af224d2b9fcb5bc175bd1450f0562afe629 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Aug 2015 23:44:54 +0300 Subject: [PATCH 87/88] dbms: added tests [#METR-17745]. --- .../0_stateless/00214_primary_key_order.reference | 9 +++++++++ .../0_stateless/00214_primary_key_order.sql | 13 +++++++++++++ .../00215_primary_key_order_zookeeper.reference | 5 +++++ .../00215_primary_key_order_zookeeper.sql | 14 ++++++++++++++ 4 files changed, 41 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00214_primary_key_order.reference create mode 100644 dbms/tests/queries/0_stateless/00214_primary_key_order.sql create mode 100644 dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.reference create mode 100644 dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.sql diff --git a/dbms/tests/queries/0_stateless/00214_primary_key_order.reference b/dbms/tests/queries/0_stateless/00214_primary_key_order.reference new file mode 100644 index 00000000000..f9a71b1af97 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00214_primary_key_order.reference @@ -0,0 +1,9 @@ +1 +2 +3 +b -3 +c -3 +c -2 +d -3 +d -2 +d -1 diff --git a/dbms/tests/queries/0_stateless/00214_primary_key_order.sql b/dbms/tests/queries/0_stateless/00214_primary_key_order.sql new file mode 100644 index 00000000000..b2c00fb1f63 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00214_primary_key_order.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS test.primary_key; +CREATE TABLE test.primary_key (d Date DEFAULT today(), x Int8) ENGINE = MergeTree(d, -x, 1); + +INSERT INTO test.primary_key (x) VALUES (1), (2), (3); + +SELECT x FROM test.primary_key ORDER BY x; + +SELECT 'a', -x FROM test.primary_key WHERE -x < -3; +SELECT 'b', -x FROM test.primary_key WHERE -x < -2; +SELECT 'c', -x FROM test.primary_key WHERE -x < -1; +SELECT 'd', -x FROM test.primary_key WHERE -x < toInt8(0); + +DROP TABLE test.primary_key; diff --git a/dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.reference b/dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.reference new file mode 100644 index 00000000000..7f43a43f889 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.reference @@ -0,0 +1,5 @@ +1 +2 +3 +2 +3 diff --git a/dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.sql b/dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.sql new file mode 100644 index 00000000000..cd86bc7aa15 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00215_primary_key_order_zookeeper.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test.primary_key; +CREATE TABLE test.primary_key (d Date DEFAULT today(), x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/primary_key', 'r1', d, -x, 1); + +INSERT INTO test.primary_key (x) VALUES (1), (2), (3); +INSERT INTO test.primary_key (x) VALUES (1), (3), (2); +INSERT INTO test.primary_key (x) VALUES (2), (1), (3); +INSERT INTO test.primary_key (x) VALUES (2), (3), (1); +INSERT INTO test.primary_key (x) VALUES (3), (1), (2); +INSERT INTO test.primary_key (x) VALUES (3), (2), (1); + +SELECT x FROM test.primary_key ORDER BY x; +SELECT x FROM test.primary_key WHERE -x < -1 ORDER BY x; + +DROP TABLE test.primary_key; From 330d0a99b096e3233d5acee1cfed3bf50fdc2f33 Mon Sep 17 00:00:00 2001 From: Anton Tikhonov Date: Wed, 19 Aug 2015 12:07:41 +0300 Subject: [PATCH 88/88] Merge --- dbms/include/DB/IO/ReadHelpers.h | 57 ++++++++++++-------------- dbms/src/IO/tests/parse_nums_check.cpp | 38 ----------------- 2 files changed, 27 insertions(+), 68 deletions(-) delete mode 100644 dbms/src/IO/tests/parse_nums_check.cpp diff --git a/dbms/include/DB/IO/ReadHelpers.h b/dbms/include/DB/IO/ReadHelpers.h index 1676c9e40ce..12302a33436 100644 --- a/dbms/include/DB/IO/ReadHelpers.h +++ b/dbms/include/DB/IO/ReadHelpers.h @@ -129,20 +129,18 @@ void readIntText(T & x, ReadBuffer & buf) if (buf.eof()) throwReadAfterEOF(); - if (*buf.position() == '-') - { - if (!std::is_signed::value) - return; - negative = true; - ++buf.position(); - } - else if (*buf.position() == '+') - ++buf.position(); - while (!buf.eof()) { switch (*buf.position()) { + case '+': + break; + case '-': + if (std::is_signed::value) + negative = true; + else + return; + break; case '0': case '1': case '2': @@ -218,9 +216,6 @@ void readIntTextUnsafe(T & x, ReadBuffer & buf) template void readFloatText(T & x, ReadBuffer & buf) { - /// Если вдруг тут перед каждым return надо будет еще что-то делать, типа домножать на экспоненту -- это можно сделать тут. -#define SCOPE_GUARDED_RETURN do { if (negative) x = -x; return; } while (0) - bool negative = false; x = 0; bool after_point = false; @@ -229,21 +224,16 @@ void readFloatText(T & x, ReadBuffer & buf) if (buf.eof()) throwReadAfterEOF(); - if (*buf.position() == '-') - { - negative = true; - ++buf.position(); - } - else if (*buf.position() == '+') - ++buf.position(); - while (!buf.eof()) { switch (*buf.position()) { + case '+': + break; + case '-': + negative = true; + break; case '.': - if (after_point) - SCOPE_GUARDED_RETURN; after_point = true; break; case '0': @@ -274,18 +264,24 @@ void readFloatText(T & x, ReadBuffer & buf) Int32 exponent = 0; readIntText(exponent, buf); x *= exp10(exponent); - SCOPE_GUARDED_RETURN; + if (negative) + x = -x; + return; } case 'i': ++buf.position(); assertString("nf", buf); x = std::numeric_limits::infinity(); - SCOPE_GUARDED_RETURN; + if (negative) + x = -x; + return; case 'I': ++buf.position(); assertString("NF", buf); x = std::numeric_limits::infinity(); - SCOPE_GUARDED_RETURN; + if (negative) + x = -x; + return; case 'n': ++buf.position(); assertString("an", buf); @@ -297,13 +293,14 @@ void readFloatText(T & x, ReadBuffer & buf) x = std::numeric_limits::quiet_NaN(); return; default: - SCOPE_GUARDED_RETURN; + if (negative) + x = -x; + return; } ++buf.position(); } - SCOPE_GUARDED_RETURN; - -#undef SCOPE_GUARDED_RETURN + if (negative) + x = -x; } diff --git a/dbms/src/IO/tests/parse_nums_check.cpp b/dbms/src/IO/tests/parse_nums_check.cpp deleted file mode 100644 index 3bde803aa9f..00000000000 --- a/dbms/src/IO/tests/parse_nums_check.cpp +++ /dev/null @@ -1,38 +0,0 @@ -#include -#include - -#include - -int main() -{ - const char input[] = "1 1.0 10.5 115e2 -5 -5.0 10- 7+8 90-3 .5 127.0.0.1 +1 +1-1"; - DB::ReadBuffer buf(const_cast(input), strlen(input), 0); - - Int64 i; - double f; - double Epsilon = 1e-10; - int t = 0; - -#define CHECK(x, y) do { DB::readText(x, buf); ++t; if (((x-y) > Epsilon) || (y-x) > Epsilon) return t; buf.ignore();} while (0); - CHECK(i, 1); - CHECK(f, 1.0f); - CHECK(f, 10.5f); - CHECK(f, 115e2); - CHECK(i, -5); - CHECK(f, -5); - CHECK(i, 10); - buf.ignore(); - CHECK(i, 7); - buf.ignore(2); - CHECK(i, 90); - buf.ignore(2); - /// Интересный случай: хотим ли мы, чтобы .5 парсилось как 0.5? Вроде бы это уместно. - CHECK(f, 0.5f); - CHECK(f, 127); - buf.ignore(4); // "0.1 " - CHECK(i, 1); - CHECK(i, 1); -#undef CHECK - - return 0; -}