From ef92ac4f3d7f1f9444c95369ba51f82f3aedd4b0 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 17:22:08 +0300 Subject: [PATCH 01/43] dbms: Server: Feature development. [#METR-17276] --- .../AggregateFunctionUniq.h | 43 ++++ .../DB/Common/CombinedCardinalityEstimator.h | 225 ++++++++++++++++++ dbms/include/DB/Common/HashTable/HashTable.h | 4 +- .../AggregateFunctionFactory.cpp | 21 ++ 4 files changed, 291 insertions(+), 2 deletions(-) create mode 100644 dbms/include/DB/Common/CombinedCardinalityEstimator.h diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 9d063d8ea8b..146bb6a9394 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -117,6 +118,26 @@ struct AggregateFunctionUniqExactData }; +template +struct AggregateFunctionUniqCombinedData +{ + using Key = T; + using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + Set set; + + static String getName() { return "uniqCombined"; } +}; + +template <> +struct AggregateFunctionUniqCombinedData +{ + using Key = UInt64; + using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + Set set; + + static String getName() { return "uniqCombined"; } +}; + namespace detail { /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. @@ -166,6 +187,28 @@ namespace detail data.set.insert(key); } }; + + template + struct OneAdder > + { + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + if (data.set.isMedium()) + data.set.insert(static_cast &>(column).getData()[row_num]); + else + data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + } + }; + + template<> + struct OneAdder > + { + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } + }; } diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h new file mode 100644 index 00000000000..3feca851b09 --- /dev/null +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -0,0 +1,225 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +template +class CombinedCardinalityEstimator +{ +public: + using Self = CombinedCardinalityEstimator; + +private: + using Small = SmallSet; + using Medium = HashType; + using Large = HyperLogLogWithSmallSetOptimization; + enum class ContainerType { SMALL, MEDIUM, LARGE }; + +public: + ~CombinedCardinalityEstimator() + { + if (container_type == ContainerType::MEDIUM) + { + delete medium; + + if (current_memory_tracker) + current_memory_tracker->free(sizeof(medium)); + } + else if (container_type == ContainerType::LARGE) + { + delete large; + + if (current_memory_tracker) + current_memory_tracker->free(sizeof(large)); + } + } + + void insert(Key value) + { + if (container_type == ContainerType::SMALL) + { + if (small.find(value) == small.end()) + { + if (!small.full()) + small.insert(value); + else + { + toMedium(); + medium->insert(value); + } + } + } + else if (container_type == ContainerType::MEDIUM) + { + if (medium->size() < medium_set_size) + medium->insert(value); + else + { + toLarge(); + large->insert(value); + } + } + else if (container_type == ContainerType::LARGE) + large->insert(value); + } + + UInt32 size() const + { + if (container_type == ContainerType::SMALL) + return small.size(); + else if (container_type == ContainerType::MEDIUM) + return medium->size(); + else if (container_type == ContainerType::LARGE) + return large->size(); + + return 0; + } + + void merge(const Self & rhs) + { + ContainerType res = max(container_type, rhs.container_type); + + if (container_type != res) + { + if (res == ContainerType::MEDIUM) + toMedium(); + else if (res == ContainerType::LARGE) + toLarge(); + } + + if (container_type == ContainerType::SMALL) + { + for (const auto & x : rhs.small) + insert(x); + } + else if (container_type == ContainerType::MEDIUM) + { + if (rhs.container_type == ContainerType::SMALL) + { + for (const auto & x : rhs.small) + insert(x); + } + else if (rhs.container_type == ContainerType::MEDIUM) + { + for (const auto & x : *rhs.medium) + insert(x); + } + } + else if (container_type == ContainerType::LARGE) + { + if (rhs.container_type == ContainerType::SMALL) + { + for (const auto & x : rhs.small) + insert(x); + } + else if (rhs.container_type == ContainerType::MEDIUM) + { + for (const auto & x : *rhs.medium) + insert(x); + } + else if (rhs.container_type == ContainerType::LARGE) + large->merge(*rhs.large); + } + } + + void read(DB::ReadBuffer & in) + { + UInt8 v; + readBinary(v, in); + ContainerType t = static_cast(v); + + if (t == ContainerType::SMALL) + small.read(in); + else if (t == ContainerType::MEDIUM) + { + toMedium(); + medium->read(in); + } + else if (t == ContainerType::LARGE) + { + toLarge(); + large->read(in); + } + } + + void readAndMerge(DB::ReadBuffer & in) + { + Self other; + other.read(in); + merge(other); + } + + void write(DB::WriteBuffer & out) const + { + UInt8 v = static_cast(container_type); + writeBinary(v, out); + + if (container_type == ContainerType::SMALL) + small.write(out); + else if (container_type == ContainerType::MEDIUM) + medium->write(out); + else if (container_type == ContainerType::LARGE) + large->write(out); + } + + bool isMedium() const + { + return container_type == ContainerType::MEDIUM; + } + +private: + void toMedium() + { + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(medium)); + + Medium * tmp_medium = new Medium; + + for (const auto & x : small) + tmp_medium->insert(x); + + medium = tmp_medium; + + container_type = ContainerType::MEDIUM; + } + + void toLarge() + { + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(large)); + + Large * tmp_large = new Large; + + for (const auto & x : *medium) + tmp_large->insert(x); + + large = tmp_large; + + delete medium; + medium = nullptr; + + if (current_memory_tracker) + current_memory_tracker->free(sizeof(medium)); + + container_type = ContainerType::LARGE; + } + + ContainerType max(const ContainerType & lhs, const ContainerType & rhs) + { + unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + return static_cast(res); + } + +private: + ContainerType container_type = ContainerType::SMALL; + const UInt32 medium_set_size = 1UL << medium_set_power; + Small small; + Medium * medium = nullptr; + Large * large = nullptr; +}; + +} diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 1fdee83c54b..67196746ae5 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -757,7 +757,7 @@ public: { Cell x; x.read(rb); - insert(x); + insert(Cell::getKey(x.getValue())); } } @@ -781,7 +781,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(x); + insert(Cell::getKey(x.getValue())); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 39464720135..4676d21bdda 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -351,6 +351,26 @@ AggregateFunctionPtr AggregateFunctionFactory::get(const String & name, const Da else throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + else if (name == "uniqCombined") + { + if (argument_types.size() != 1) + throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const IDataType & argument_type = *argument_types[0]; + + AggregateFunctionPtr res = createWithNumericType(*argument_types[0]); + + if (res) + return res; + else if (typeid_cast(&argument_type)) + return new AggregateFunctionUniq>; + else if (typeid_cast(&argument_type)) + return new AggregateFunctionUniq>; + else if (typeid_cast(&argument_type) || typeid_cast(&argument_type)) + return new AggregateFunctionUniq>; + else + throw Exception("Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } else if (name == "uniqUpTo") { if (argument_types.size() != 1) @@ -706,6 +726,7 @@ const AggregateFunctionFactory::FunctionNames & AggregateFunctionFactory::getFun "uniq", "uniqHLL12", "uniqExact", + "uniqCombined", "uniqUpTo", "groupArray", "groupUniqArray", From 242658c005abf53b9a0b265dfde4c0d74b8be86b Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 17:54:56 +0300 Subject: [PATCH 02/43] dbms: Server: Feature development. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 3feca851b09..5b47dcae1af 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -65,6 +65,8 @@ public: } else if (container_type == ContainerType::LARGE) large->insert(value); + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } UInt32 size() const @@ -75,8 +77,8 @@ public: return medium->size(); else if (container_type == ContainerType::LARGE) return large->size(); - - return 0; + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void merge(const Self & rhs) @@ -124,6 +126,8 @@ public: else if (rhs.container_type == ContainerType::LARGE) large->merge(*rhs.large); } + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void read(DB::ReadBuffer & in) @@ -144,6 +148,8 @@ public: toLarge(); large->read(in); } + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void readAndMerge(DB::ReadBuffer & in) @@ -155,7 +161,7 @@ public: void write(DB::WriteBuffer & out) const { - UInt8 v = static_cast(container_type); + UInt8 v = static_cast(container_type); writeBinary(v, out); if (container_type == ContainerType::SMALL) @@ -164,6 +170,8 @@ public: medium->write(out); else if (container_type == ContainerType::LARGE) large->write(out); + else + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } bool isMedium() const From 92adfc9e0ea04c59f3d6d84ecc36ec7717e91edd Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 18:43:24 +0300 Subject: [PATCH 03/43] dbms: Server: Feature development. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 107 ++++++++++-------- 1 file changed, 60 insertions(+), 47 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 5b47dcae1af..0793f82e94d 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -4,32 +4,45 @@ #include #include + namespace DB { -template +namespace details +{ + +enum class ContainerType { SMALL, MEDIUM, LARGE }; + +ContainerType max(const ContainerType & lhs, const ContainerType & rhs) +{ + unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + return static_cast(res); +} + +} + +template class CombinedCardinalityEstimator { public: - using Self = CombinedCardinalityEstimator; + using Self = CombinedCardinalityEstimator; private: - using Small = SmallSet; - using Medium = HashType; - using Large = HyperLogLogWithSmallSetOptimization; - enum class ContainerType { SMALL, MEDIUM, LARGE }; + using Small = SmallSet; + using Medium = HashContainer; + using Large = HyperLogLogWithSmallSetOptimization; public: ~CombinedCardinalityEstimator() { - if (container_type == ContainerType::MEDIUM) + if (container_type == details::ContainerType::MEDIUM) { delete medium; if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { delete large; @@ -40,7 +53,7 @@ public: void insert(Key value) { - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) { if (small.find(value) == small.end()) { @@ -53,9 +66,9 @@ public: } } } - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { - if (medium->size() < medium_set_size) + if (medium->size() < medium_set_size_max) medium->insert(value); else { @@ -63,7 +76,7 @@ public: large->insert(value); } } - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) large->insert(value); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -71,11 +84,11 @@ public: UInt32 size() const { - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) return small.size(); - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) return medium->size(); - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) return large->size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -83,47 +96,47 @@ public: void merge(const Self & rhs) { - ContainerType res = max(container_type, rhs.container_type); + details::ContainerType max_container_type = details::max(container_type, rhs.container_type); - if (container_type != res) + if (container_type != max_container_type) { - if (res == ContainerType::MEDIUM) + if (max_container_type == details::ContainerType::MEDIUM) toMedium(); - else if (res == ContainerType::LARGE) + else if (max_container_type == details::ContainerType::LARGE) toLarge(); } - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { - if (rhs.container_type == ContainerType::SMALL) + if (rhs.container_type == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == ContainerType::MEDIUM) + else if (rhs.container_type == details::ContainerType::MEDIUM) { for (const auto & x : *rhs.medium) insert(x); } } - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { - if (rhs.container_type == ContainerType::SMALL) + if (rhs.container_type == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == ContainerType::MEDIUM) + else if (rhs.container_type == details::ContainerType::MEDIUM) { for (const auto & x : *rhs.medium) insert(x); } - else if (rhs.container_type == ContainerType::LARGE) + else if (rhs.container_type == details::ContainerType::LARGE) large->merge(*rhs.large); } else @@ -134,16 +147,16 @@ public: { UInt8 v; readBinary(v, in); - ContainerType t = static_cast(v); + details::ContainerType t = static_cast(v); - if (t == ContainerType::SMALL) + if (t == details::ContainerType::SMALL) small.read(in); - else if (t == ContainerType::MEDIUM) + else if (t == details::ContainerType::MEDIUM) { toMedium(); medium->read(in); } - else if (t == ContainerType::LARGE) + else if (t == details::ContainerType::LARGE) { toLarge(); large->read(in); @@ -164,11 +177,11 @@ public: UInt8 v = static_cast(container_type); writeBinary(v, out); - if (container_type == ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) small.write(out); - else if (container_type == ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) medium->write(out); - else if (container_type == ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) large->write(out); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -176,12 +189,17 @@ public: bool isMedium() const { - return container_type == ContainerType::MEDIUM; + return container_type == details::ContainerType::MEDIUM; } private: void toMedium() { + if (container_type != details::ContainerType::SMALL) + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + + container_type = details::ContainerType::MEDIUM; + if (current_memory_tracker) current_memory_tracker->alloc(sizeof(medium)); @@ -191,12 +209,15 @@ private: tmp_medium->insert(x); medium = tmp_medium; - - container_type = ContainerType::MEDIUM; } void toLarge() { + if (container_type != details::ContainerType::MEDIUM) + throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + + container_type = details::ContainerType::LARGE; + if (current_memory_tracker) current_memory_tracker->alloc(sizeof(large)); @@ -212,22 +233,14 @@ private: if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); - - container_type = ContainerType::LARGE; - } - - ContainerType max(const ContainerType & lhs, const ContainerType & rhs) - { - unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); - return static_cast(res); } private: - ContainerType container_type = ContainerType::SMALL; - const UInt32 medium_set_size = 1UL << medium_set_power; Small small; Medium * medium = nullptr; Large * large = nullptr; + const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; + details::ContainerType container_type = details::ContainerType::SMALL; }; } From 7ce91cc59a9cb88efb7d6e729ff7cda680fe5c69 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 19:16:56 +0300 Subject: [PATCH 04/43] dbms: Server: Feature development. [#METR-17276] --- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 0793f82e94d..b0377858026 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -21,6 +21,10 @@ ContainerType max(const ContainerType & lhs, const ContainerType & rhs) } +/** Для маленького количества ключей - массив фиксированного размера "на стеке". + * Для среднего - выделяется HashSet. + * Для большого - выделяется HyperLogLog. + */ template class CombinedCardinalityEstimator { @@ -143,6 +147,7 @@ public: throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } + /// Можно вызывать только для пустого объекта. void read(DB::ReadBuffer & in) { UInt8 v; From f95d090a45c81030b0ee4d0eec74a58d4cce0e0f Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 20 Jul 2015 20:07:51 +0300 Subject: [PATCH 05/43] dbms: Server: Feature development. [#METR-17276] --- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index b0377858026..a3d15a8c155 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -218,7 +218,7 @@ private: void toLarge() { - if (container_type != details::ContainerType::MEDIUM) + if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); container_type = details::ContainerType::LARGE; From 45d8bebff66e60f1af0ca5b843cb7ec4223ccfc1 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 22 Jul 2015 17:12:34 +0300 Subject: [PATCH 06/43] dbms: Server: Squeezed most of the fields into a 8-byte memory chunk. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 171 ++++++++++++------ 1 file changed, 113 insertions(+), 58 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 43c11380668..cef45658521 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -11,9 +12,9 @@ namespace DB namespace details { -enum class ContainerType { SMALL, MEDIUM, LARGE }; +enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 }; -ContainerType max(const ContainerType & lhs, const ContainerType & rhs) +static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs) { unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); return static_cast(res); @@ -37,26 +38,20 @@ private: using Large = HyperLogLogWithSmallSetOptimization; public: + CombinedCardinalityEstimator() + { + setContainerType(details::ContainerType::SMALL); + } + ~CombinedCardinalityEstimator() { - if (container_type == details::ContainerType::MEDIUM) - { - delete medium; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(medium)); - } - else if (container_type == details::ContainerType::LARGE) - { - delete large; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(large)); - } + destroy(); } void insert(Key value) { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) { if (small.find(value) == small.end()) @@ -66,41 +61,44 @@ public: else { toMedium(); - medium->insert(value); + getObject()->insert(value); } } } else if (container_type == details::ContainerType::MEDIUM) { - if (medium->size() < medium_set_size_max) - medium->insert(value); + if (getObject()->size() < medium_set_size_max) + getObject()->insert(value); else { toLarge(); - large->insert(value); + getObject()->insert(value); } } else if (container_type == details::ContainerType::LARGE) - large->insert(value); + getObject()->insert(value); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } UInt32 size() const { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) return small.size(); else if (container_type == details::ContainerType::MEDIUM) - return medium->size(); + return getObject()->size(); else if (container_type == details::ContainerType::LARGE) - return large->size(); + return getObject()->size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void merge(const Self & rhs) { - details::ContainerType max_container_type = details::max(container_type, rhs.container_type); + auto container_type = getContainerType(); + auto max_container_type = details::max(container_type, rhs.getContainerType()); if (container_type != max_container_type) { @@ -117,31 +115,31 @@ public: } else if (container_type == details::ContainerType::MEDIUM) { - if (rhs.container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.medium) + for (const auto & x : *rhs.getObject()) insert(x); } } else if (container_type == details::ContainerType::LARGE) { - if (rhs.container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (rhs.container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.medium) + for (const auto & x : *rhs.getObject()) insert(x); } - else if (rhs.container_type == details::ContainerType::LARGE) - large->merge(*rhs.large); + else if (rhs.getContainerType() == details::ContainerType::LARGE) + getObject()->merge(*rhs.getObject()); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -159,12 +157,12 @@ public: else if (t == details::ContainerType::MEDIUM) { toMedium(); - medium->read(in); + getObject()->read(in); } else if (t == details::ContainerType::LARGE) { toLarge(); - large->read(in); + getObject()->read(in); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -179,52 +177,57 @@ public: void write(DB::WriteBuffer & out) const { + auto container_type = getContainerType(); + UInt8 v = static_cast(container_type); writeBinary(v, out); if (container_type == details::ContainerType::SMALL) small.write(out); else if (container_type == details::ContainerType::MEDIUM) - medium->write(out); + getObject()->write(out); else if (container_type == details::ContainerType::LARGE) - large->write(out); + getObject()->write(out); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } bool isMedium() const { - return container_type == details::ContainerType::MEDIUM; + return getContainerType() == details::ContainerType::MEDIUM; } private: void toMedium() { - if (container_type != details::ContainerType::SMALL) + if (getContainerType() != details::ContainerType::SMALL) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(medium)); - - Medium * tmp_medium = new Medium; + auto tmp_medium = std::make_unique(); for (const auto & x : small) tmp_medium->insert(x); - medium = tmp_medium; + new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - container_type = details::ContainerType::MEDIUM; + std::atomic_signal_fence(std::memory_order_seq_cst); + + setContainerType(details::ContainerType::MEDIUM); + + std::atomic_signal_fence(std::memory_order_seq_cst); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(medium)); } void toLarge() { + auto container_type = getContainerType(); + if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(large)); - - Large * tmp_large = new Large; + auto tmp_large = std::make_unique(); if (container_type == details::ContainerType::SMALL) { @@ -233,30 +236,82 @@ private: } else if (container_type == details::ContainerType::MEDIUM) { - for (const auto & x : *medium) + for (const auto & x : *getObject()) tmp_large->insert(x); + + destroy(); } - large = tmp_large; + new (&large) std::unique_ptr{ std::move(tmp_large) }; + + std::atomic_signal_fence(std::memory_order_seq_cst); + + setContainerType(details::ContainerType::LARGE); + + std::atomic_signal_fence(std::memory_order_seq_cst); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(large)); + + } + + void NO_INLINE destroy() + { + auto container_type = getContainerType(); + + clearContainerType(); if (container_type == details::ContainerType::MEDIUM) { - delete medium; - medium = nullptr; - + medium.std::unique_ptr::~unique_ptr(); if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } + else if (container_type == details::ContainerType::LARGE) + { + large.std::unique_ptr::~unique_ptr(); + if (current_memory_tracker) + current_memory_tracker->free(sizeof(large)); + } + } - container_type = details::ContainerType::LARGE; + template + T * getObject() + { + return reinterpret_cast(address & mask); + } + + template + const T * getObject() const + { + return reinterpret_cast(address & mask); + } + + void setContainerType(details::ContainerType t) + { + address |= static_cast(t); + } + + details::ContainerType getContainerType() const + { + return static_cast(address & ~mask); + } + + void clearContainerType() + { + address &= mask; } private: Small small; - Medium * medium = nullptr; - Large * large = nullptr; - const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; - details::ContainerType container_type = details::ContainerType::SMALL; + union + { + std::unique_ptr medium; + std::unique_ptr large; + UInt64 address = 0; + }; + static const UInt64 mask = 0xFFFFFFFC; + static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; }; } From 4bf6fe20706393dd89d80037b65a95ef9afe8612 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 22 Jul 2015 18:25:59 +0300 Subject: [PATCH 07/43] dbms: Server: Feature development. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 53 +++++++++---------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index cef45658521..014903c0db0 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -61,22 +61,23 @@ public: else { toMedium(); - getObject()->insert(value); + getContainer().insert(value); } } } else if (container_type == details::ContainerType::MEDIUM) { - if (getObject()->size() < medium_set_size_max) - getObject()->insert(value); + auto & container = getContainer(); + if (container.size() < medium_set_size_max) + container.insert(value); else { toLarge(); - getObject()->insert(value); + getContainer().insert(value); } } else if (container_type == details::ContainerType::LARGE) - getObject()->insert(value); + getContainer().insert(value); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } @@ -88,9 +89,9 @@ public: if (container_type == details::ContainerType::SMALL) return small.size(); else if (container_type == details::ContainerType::MEDIUM) - return getObject()->size(); + return getContainer().size(); else if (container_type == details::ContainerType::LARGE) - return getObject()->size(); + return getContainer().size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } @@ -122,7 +123,7 @@ public: } else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.getObject()) + for (const auto & x : rhs.getContainer()) insert(x); } } @@ -135,11 +136,11 @@ public: } else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - for (const auto & x : *rhs.getObject()) + for (const auto & x : rhs.getContainer()) insert(x); } else if (rhs.getContainerType() == details::ContainerType::LARGE) - getObject()->merge(*rhs.getObject()); + getContainer().merge(rhs.getContainer()); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -150,19 +151,19 @@ public: { UInt8 v; readBinary(v, in); - details::ContainerType t = static_cast(v); + auto container_type = static_cast(v); - if (t == details::ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) small.read(in); - else if (t == details::ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { toMedium(); - getObject()->read(in); + getContainer().read(in); } - else if (t == details::ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { toLarge(); - getObject()->read(in); + getContainer().read(in); } else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); @@ -178,16 +179,14 @@ public: void write(DB::WriteBuffer & out) const { auto container_type = getContainerType(); - - UInt8 v = static_cast(container_type); - writeBinary(v, out); + writeBinary(static_cast(container_type), out); if (container_type == details::ContainerType::SMALL) small.write(out); else if (container_type == details::ContainerType::MEDIUM) - getObject()->write(out); + getContainer().write(out); else if (container_type == details::ContainerType::LARGE) - getObject()->write(out); + getContainer().write(out); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } @@ -236,7 +235,7 @@ private: } else if (container_type == details::ContainerType::MEDIUM) { - for (const auto & x : *getObject()) + for (const auto & x : getContainer()) tmp_large->insert(x); destroy(); @@ -276,15 +275,15 @@ private: } template - T * getObject() + T & getContainer() { - return reinterpret_cast(address & mask); + return *reinterpret_cast(address & mask); } template - const T * getObject() const + const T & getContainer() const { - return reinterpret_cast(address & mask); + return *reinterpret_cast(address & mask); } void setContainerType(details::ContainerType t) @@ -292,7 +291,7 @@ private: address |= static_cast(t); } - details::ContainerType getContainerType() const + inline details::ContainerType getContainerType() const { return static_cast(address & ~mask); } From b3bcc4f6688fa1f7f7eb8075b0d4efa1aaf9eef2 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 24 Jul 2015 19:46:00 +0300 Subject: [PATCH 08/43] dbms: Server: Various optimizations. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 91 +++++++++++-------- dbms/include/DB/Common/HashTable/HashTable.h | 46 ++++++++++ dbms/include/DB/Common/HashTable/SmallTable.h | 50 ++++++++++ .../HyperLogLogWithSmallSetOptimization.h | 18 +++- dbms/include/DB/Core/ErrorCodes.h | 1 + 5 files changed, 163 insertions(+), 43 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 014903c0db0..b7b3fb50552 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include @@ -16,7 +16,7 @@ enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 }; static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs) { - unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + UInt8 res = std::max(static_cast(lhs), static_cast(rhs)); return static_cast(res); } @@ -26,16 +26,25 @@ static inline ContainerType max(const ContainerType & lhs, const ContainerType & * Для среднего - выделяется HashSet. * Для большого - выделяется HyperLogLog. */ -template +template +< + typename Key, + typename HashContainer, + UInt8 small_set_size_max, + UInt8 medium_set_power2_max, + UInt8 K, + typename Hash = IntHash32, + typename DenominatorType = float +> class CombinedCardinalityEstimator { public: - using Self = CombinedCardinalityEstimator; + using Self = CombinedCardinalityEstimator; private: using Small = SmallSet; using Medium = HashContainer; - using Large = HyperLogLogWithSmallSetOptimization; + using Large = HyperLogLogCounter; public: CombinedCardinalityEstimator() @@ -109,41 +118,18 @@ public: toLarge(); } - if (container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - if (rhs.getContainerType() == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.getContainerType() == details::ContainerType::MEDIUM) - { - for (const auto & x : rhs.getContainer()) - insert(x); - } + for (const auto & x : rhs.getContainer()) + insert(x); } - else if (container_type == details::ContainerType::LARGE) - { - if (rhs.getContainerType() == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.getContainerType() == details::ContainerType::MEDIUM) - { - for (const auto & x : rhs.getContainer()) - insert(x); - } - else if (rhs.getContainerType() == details::ContainerType::LARGE) - getContainer().merge(rhs.getContainer()); - } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + else if (rhs.getContainerType() == details::ContainerType::LARGE) + getContainer().merge(rhs.getContainer()); } /// Можно вызывать только для пустого объекта. @@ -171,9 +157,36 @@ public: void readAndMerge(DB::ReadBuffer & in) { - Self other; - other.read(in); - merge(other); + auto container_type = getContainerType(); + + UInt8 v; + readBinary(v, in); + auto rhs_container_type = static_cast(v); + + auto max_container_type = details::max(container_type, rhs_container_type); + + if (container_type != max_container_type) + { + if (max_container_type == details::ContainerType::MEDIUM) + toMedium(); + else if (max_container_type == details::ContainerType::LARGE) + toLarge(); + } + + if (rhs_container_type == details::ContainerType::SMALL) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::MEDIUM) + { + typename Medium::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::LARGE) + getContainer().readAndMerge(in); } void write(DB::WriteBuffer & out) const @@ -275,13 +288,13 @@ private: } template - T & getContainer() + inline T & getContainer() { return *reinterpret_cast(address & mask); } template - const T & getContainer() const + inline const T & getContainer() const { return *reinterpret_cast(address & mask); } diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 67196746ae5..2ce34905d69 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -251,6 +251,7 @@ class HashTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; template friend class TwoLevelHashTable; @@ -429,6 +430,51 @@ public: free(); } + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return Cell::getKey(cell.getValue()); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; class iterator { diff --git a/dbms/include/DB/Common/HashTable/SmallTable.h b/dbms/include/DB/Common/HashTable/SmallTable.h index 10ec8479b93..521fe117845 100644 --- a/dbms/include/DB/Common/HashTable/SmallTable.h +++ b/dbms/include/DB/Common/HashTable/SmallTable.h @@ -27,6 +27,7 @@ class SmallTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; typedef SmallTable Self; typedef Cell cell_type; @@ -66,6 +67,55 @@ public: typedef typename Cell::value_type value_type; + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + + if (size > capacity) + throw DB::Exception("Illegal size"); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return Cell::getKey(cell.getValue()); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; + class iterator { Self * container; diff --git a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h index 7932ddfb0e8..405f7c5ca12 100644 --- a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h @@ -114,10 +114,20 @@ public: void readAndMerge(DB::ReadBuffer & in) { - /// Немного не оптимально. - HyperLogLogWithSmallSetOptimization other; - other.read(in); - merge(other); + bool is_rhs_large; + readBinary(is_rhs_large, in); + + if (!isLarge() && is_rhs_large) + toLarge(); + + if (!is_rhs_large) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else + large->readAndMerge(in); } void write(DB::WriteBuffer & out) const diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 937b06d5ce0..8fae35ea601 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -283,6 +283,7 @@ namespace ErrorCodes INDEX_NOT_USED = 277, LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, + NO_AVAILABLE_DATA = 280, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, From 4da092418eabaecb0f40857654f0782da696b986 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 17:14:44 +0300 Subject: [PATCH 09/43] dbms: Server: Fixes. [#METR-17276] --- dbms/include/DB/Common/HashTable/HashTable.h | 6 +++--- dbms/include/DB/Common/HashTable/SmallTable.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 2ce34905d69..0b216e1ca0e 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -465,7 +465,7 @@ public: if ((read_count == 0) || is_eof) throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); - return Cell::getKey(cell.getValue()); + return cell.getValue(); } private: @@ -803,7 +803,7 @@ public: { Cell x; x.read(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } @@ -827,7 +827,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } diff --git a/dbms/include/DB/Common/HashTable/SmallTable.h b/dbms/include/DB/Common/HashTable/SmallTable.h index 521fe117845..c68963a4798 100644 --- a/dbms/include/DB/Common/HashTable/SmallTable.h +++ b/dbms/include/DB/Common/HashTable/SmallTable.h @@ -105,7 +105,7 @@ public: if ((read_count == 0) || is_eof) throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); - return Cell::getKey(cell.getValue()); + return cell.getValue(); } private: From 99e2b241c0cb179e0154f52fc11fc9387725cbf9 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 17:53:34 +0300 Subject: [PATCH 10/43] dbms: Server: Fixes. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 5 +---- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 5 ----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 146bb6a9394..a5a7b3f795d 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -193,10 +193,7 @@ namespace detail { static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) { - if (data.set.isMedium()) - data.set.insert(static_cast &>(column).getData()[row_num]); - else - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + data.set.insert(static_cast &>(column).getData()[row_num]); } }; diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index b7b3fb50552..e5cab187e68 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -204,11 +204,6 @@ public: throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } - bool isMedium() const - { - return getContainerType() == details::ContainerType::MEDIUM; - } - private: void toMedium() { From 98557ba369db99f4fc82e2b2db19e8df5dacb926 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 20:06:39 +0300 Subject: [PATCH 11/43] dbms: Server: Feed CombinedCardinalityEstimator with at most 32-bit wide values. [#METR-17276] --- .../AggregateFunctionUniq.h | 61 ++++++++++++++++++- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index a5a7b3f795d..44d4d7bc4b2 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -122,7 +122,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19>; Set set; static String getName() { return "uniqCombined"; } @@ -132,7 +132,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19>; Set set; static String getName() { return "uniqCombined"; } @@ -140,6 +140,60 @@ struct AggregateFunctionUniqCombinedData namespace detail { + template + struct Hash64To32; + + template + struct Hash64To32::value || std::is_same::value>::type> + { + static UInt32 compute(T key) + { + using U = typename std::make_unsigned::type; + auto x = static_cast(key); + + x = (~x) + (x << 18); + x = x ^ (x >> 31); + x = x * 21; + x = x ^ (x >> 11); + x = x + (x << 6); + x = x ^ (x >> 22); + return static_cast(x); + } + }; + + template + struct CombinedCardinalityTraits + { + static UInt32 hash(T key) + { + return key; + } + }; + + template + struct CombinedCardinalityTraits::value || std::is_same::value>::type> + { + using Op = Hash64To32; + + static UInt32 hash(T key) + { + return Op::compute(key); + }; + }; + + template + struct CombinedCardinalityTraits::value>::type> + { + using Op = Hash64To32; + + static UInt32 hash(T key) + { + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return Op::compute(res); + } + }; + /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. * Используется для частичной специализации для добавления строк. */ @@ -193,7 +247,8 @@ namespace detail { static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) { - data.set.insert(static_cast &>(column).getData()[row_num]); + const auto & value = static_cast &>(column).getData()[row_num]; + data.set.insert(CombinedCardinalityTraits::hash(value)); } }; From b17d3d79fc493cc6fdfbf06142f24cfc5bd534e7 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 20:13:02 +0300 Subject: [PATCH 12/43] dbms: Server: Updated comment. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 44d4d7bc4b2..d61e99c2c86 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -146,6 +146,7 @@ namespace detail template struct Hash64To32::value || std::is_same::value>::type> { + /// https://gist.github.com/badboy/6267743 static UInt32 compute(T key) { using U = typename std::make_unsigned::type; From 3d301bb74939818e648c8803b3a413f73dd4254a Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 29 Jul 2015 20:22:17 +0300 Subject: [PATCH 13/43] dbms: Server: Use trivial has for HLL12 case. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index d61e99c2c86..20b804d08a5 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -122,7 +122,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -132,7 +132,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } From 3251f5978a4221d8f2186904c08cd44ed1c1d364 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Thu, 30 Jul 2015 20:04:49 +0300 Subject: [PATCH 14/43] dbms: Server: Made HyperLogLog implementation more understandable. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index e5cab187e68..00a01232b31 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -87,8 +87,6 @@ public: } else if (container_type == details::ContainerType::LARGE) getContainer().insert(value); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } UInt32 size() const @@ -151,8 +149,6 @@ public: toLarge(); getContainer().read(in); } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void readAndMerge(DB::ReadBuffer & in) @@ -200,8 +196,6 @@ public: getContainer().write(out); else if (container_type == details::ContainerType::LARGE) getContainer().write(out); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } private: @@ -217,12 +211,8 @@ private: new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - std::atomic_signal_fence(std::memory_order_seq_cst); - setContainerType(details::ContainerType::MEDIUM); - std::atomic_signal_fence(std::memory_order_seq_cst); - if (current_memory_tracker) current_memory_tracker->alloc(sizeof(medium)); } @@ -251,12 +241,8 @@ private: new (&large) std::unique_ptr{ std::move(tmp_large) }; - std::atomic_signal_fence(std::memory_order_seq_cst); - setContainerType(details::ContainerType::LARGE); - std::atomic_signal_fence(std::memory_order_seq_cst); - if (current_memory_tracker) current_memory_tracker->alloc(sizeof(large)); From b32721432bc78bd7077b4414319418304afd2b39 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 3 Aug 2015 20:32:37 +0300 Subject: [PATCH 15/43] dbms: Server: Added stateless functional test (stateful one is in progress). [#METR-17276] --- .../00211_aggregate_function_uniq.reference | 416 ++++++++++++++++++ .../00211_aggregate_function_uniq.sql | 35 ++ 2 files changed, 451 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference new file mode 100644 index 00000000000..64f3c19bb38 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference @@ -0,0 +1,416 @@ +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 159 +1 164 +3 165 +6 162 +7 160 +9 164 +10 81 +11 158 +13 161 +14 160 +17 163 +19 164 +20 159 +21 161 +22 159 +26 160 +31 164 +35 160 +36 161 +0 54571 +1 55013 +3 52912 +6 52353 +7 54011 +9 54138 +10 26870 +11 54554 +13 53951 +14 53396 +17 55227 +19 55115 +20 54370 +21 54268 +22 54620 +26 53394 +31 54151 +35 54328 +36 52997 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 161 +0.125 160 +0.5 164 +0.05 164 +0.143 162 +0.091 81 +0.056 163 +0.048 159 +0.083 158 +0.25 165 +1 159 +0.1 164 +0.028 160 +0.027 161 +0.031 164 +0.067 160 +0.043 159 +0.037 160 +0.071 161 +0.045 54268 +0.125 54011 +0.5 55013 +0.05 55115 +0.143 52353 +0.091 26870 +0.056 55227 +0.048 54370 +0.083 54554 +0.25 52912 +1 54571 +0.1 54138 +0.028 54328 +0.027 52997 +0.031 54151 +0.067 53396 +0.043 54620 +0.037 53394 +0.071 53951 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 164 +0.05 164 +0.25 165 +0.048 159 +0.091 81 +0.043 159 +0.071 161 +0.083 158 +0.125 160 +0.031 164 +0.143 162 +0.028 160 +0.067 160 +0.045 161 +0.027 161 +0.056 163 +0.037 160 +0.1 164 +1 159 +0.5 55013 +0.05 55115 +0.25 52912 +0.048 54370 +0.091 26870 +0.043 54620 +0.071 53951 +0.083 54554 +0.125 54011 +0.031 54151 +0.143 52353 +0.028 54328 +0.067 53396 +0.045 54268 +0.027 52997 +0.056 55227 +0.037 53394 +0.1 54138 +1 54571 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 158 +3 162 +6 163 +7 162 +9 162 +10 79 +11 162 +13 163 +14 160 +17 163 +19 158 +20 162 +21 157 +22 164 +26 162 +31 161 +35 162 +36 163 +0 54029 +1 53772 +3 53540 +6 54012 +7 53910 +9 52761 +10 26462 +11 52701 +13 54505 +14 53790 +17 54064 +19 55420 +20 56686 +21 52639 +22 54251 +26 53827 +31 53574 +35 55022 +36 53961 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54053 +6 54054 +7 54054 +9 54053 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54053 +22 54054 +26 54054 +31 54054 +35 54054 +36 54053 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54053 +0.125 54054 +0.5 54054 +0.05 54054 +0.143 54054 +0.091 27027 +0.056 54054 +0.048 54054 +0.083 54055 +0.25 54053 +1 54054 +0.1 54053 +0.028 54054 +0.027 54053 +0.031 54054 +0.067 54054 +0.043 54054 +0.037 54054 +0.071 54054 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54054 +0.05 54054 +0.25 54053 +0.048 54054 +0.091 27027 +0.043 54054 +0.071 54054 +0.083 54055 +0.125 54054 +0.031 54054 +0.143 54054 +0.028 54054 +0.067 54054 +0.045 54053 +0.027 54053 +0.056 54054 +0.037 54054 +0.1 54053 +1 54054 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54054 +9 54054 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54054 +22 54054 +26 54054 +31 54054 +35 54054 +36 54054 diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql new file mode 100644 index 00000000000..2886daeb3b3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql @@ -0,0 +1,35 @@ +/* uniqHLL12 */ + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +/* uniqCombined */ + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; From 70f4887d747620d53fe7bb842578adac39ed0efb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 3 Aug 2015 23:11:57 +0300 Subject: [PATCH 16/43] dbms: tiny modifications [#METR-2944]. --- dbms/src/Core/Block.cpp | 2 +- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index fa164d12a18..d14cec43222 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -308,7 +308,7 @@ std::string Block::dumpStructure() const if (it->column) res << ' ' << it->column->getName() << ' ' << it->column->size(); else - res << "nullptr"; + res << " nullptr"; } return res.str(); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index a6c66a015d3..9430247bafd 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -403,7 +403,7 @@ void InterpreterSelectQuery::executeSingleQuery() bool has_having = false; bool has_order_by = false; - ExpressionActionsPtr before_join; + ExpressionActionsPtr before_join; /// включая JOIN ExpressionActionsPtr before_where; ExpressionActionsPtr before_aggregation; ExpressionActionsPtr before_having; From 99aec7b6c132aa86e2c70216dd49c93fa7585bc3 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 4 Aug 2015 14:22:13 +0300 Subject: [PATCH 17/43] dbms: Server: Fixed timeout value. [#METR-14410] --- dbms/src/Client/ParallelReplicas.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/dbms/src/Client/ParallelReplicas.cpp b/dbms/src/Client/ParallelReplicas.cpp index 135c1b06aa0..538d4cf2caf 100644 --- a/dbms/src/Client/ParallelReplicas.cpp +++ b/dbms/src/Client/ParallelReplicas.cpp @@ -263,9 +263,8 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() Poco::Net::Socket::SocketList read_list; read_list.reserve(active_replica_count); - /** Сначала проверяем, есть ли данные, которые уже лежат в буфере - * хоть одного соединения. - */ + /// Сначала проверяем, есть ли данные, которые уже лежат в буфере + /// хоть одного соединения. for (auto & e : replica_map) { Connection * connection = e.second; @@ -273,9 +272,8 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() read_list.push_back(connection->socket); } - /** Если не было найдено никаких данных, то проверяем, есть ли соединения - * готовые для чтения. - */ + /// Если не было найдено никаких данных, то проверяем, есть ли соединения + /// готовые для чтения. if (read_list.empty()) { Poco::Net::Socket::SocketList write_list; @@ -287,7 +285,7 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() if (connection != nullptr) read_list.push_back(connection->socket); } - int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->poll_interval * 1000000); + int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->receive_timeout); if (n == 0) return replica_map.end(); } From 58e2a7ef6b7fb6d1b81775c1339020ecf6e6d970 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 4 Aug 2015 15:33:08 +0300 Subject: [PATCH 18/43] dbms: Server: Removed useless hashing in the case of 32-bit floating point values. [#METR-17276] --- .../DB/AggregateFunctions/AggregateFunctionUniq.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 20b804d08a5..c581a811956 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -183,7 +183,7 @@ namespace detail }; template - struct CombinedCardinalityTraits::value>::type> + struct CombinedCardinalityTraits::value>::type> { using Op = Hash64To32; @@ -195,6 +195,17 @@ namespace detail } }; + template + struct CombinedCardinalityTraits::value>::type> + { + static UInt32 hash(T key) + { + UInt32 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return res; + } + }; + /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. * Используется для частичной специализации для добавления строк. */ From 891aed0744729a6f45ec3a519d9c13162795a762 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 4 Aug 2015 16:37:56 +0300 Subject: [PATCH 19/43] dbms: Server: Cosmetic changes. [#METR-17276] --- .../AggregateFunctionUniq.h | 280 +++++++++--------- 1 file changed, 145 insertions(+), 135 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index c581a811956..08f28a14be8 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -25,32 +25,7 @@ namespace DB { - -template struct AggregateFunctionUniqTraits -{ - static UInt64 hash(T x) { return x; } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float32 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float64 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - +/// uniq struct AggregateFunctionUniqUniquesHashSetData { @@ -60,6 +35,7 @@ struct AggregateFunctionUniqUniquesHashSetData static String getName() { return "uniq"; } }; +/// uniqHLL12 template struct AggregateFunctionUniqHLL12Data @@ -79,6 +55,7 @@ struct AggregateFunctionUniqHLL12Data static String getName() { return "uniqHLL12"; } }; +/// uniqExact template struct AggregateFunctionUniqExactData @@ -140,139 +117,172 @@ struct AggregateFunctionUniqCombinedData namespace detail { - template - struct Hash64To32; - template - struct Hash64To32::value || std::is_same::value>::type> +/** Хэширование 64-битных целочисленных значений в 32-битные. + * Источник: https://gist.github.com/badboy/6267743 + */ +template +struct Hash64To32; + +template +struct Hash64To32::value || std::is_same::value>::type> +{ + static UInt32 compute(T key) { - /// https://gist.github.com/badboy/6267743 - static UInt32 compute(T key) - { - using U = typename std::make_unsigned::type; - auto x = static_cast(key); + using U = typename std::make_unsigned::type; + auto x = static_cast(key); - x = (~x) + (x << 18); - x = x ^ (x >> 31); - x = x * 21; - x = x ^ (x >> 11); - x = x + (x << 6); - x = x ^ (x >> 22); - return static_cast(x); - } - }; + x = (~x) + (x << 18); + x = x ^ (x >> 31); + x = x * 21; + x = x ^ (x >> 11); + x = x + (x << 6); + x = x ^ (x >> 22); + return static_cast(x); + } +}; - template - struct CombinedCardinalityTraits +/** Хэш-функция для uniqCombined. + */ +template +struct CombinedCardinalityTraits +{ + static UInt32 hash(T key) { - static UInt32 hash(T key) - { - return key; - } - }; + return key; + } +}; - template - struct CombinedCardinalityTraits::value || std::is_same::value>::type> +template +struct CombinedCardinalityTraits::value || std::is_same::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - using Op = Hash64To32; - - static UInt32 hash(T key) - { - return Op::compute(key); - }; + return Op::compute(key); }; +}; - template - struct CombinedCardinalityTraits::value>::type> +template +struct CombinedCardinalityTraits::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - using Op = Hash64To32; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return Op::compute(res); + } +}; - static UInt32 hash(T key) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); - return Op::compute(res); - } - }; - - template - struct CombinedCardinalityTraits::value>::type> +template +struct CombinedCardinalityTraits::value>::type> +{ + static UInt32 hash(T key) { - static UInt32 hash(T key) - { - UInt32 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); - return res; - } - }; + UInt32 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return res; + } +}; - /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. - * Используется для частичной специализации для добавления строк. - */ - template - struct OneAdder +/** Хэш-функция для uniq. + */ +template struct AggregateFunctionUniqTraits +{ + static UInt64 hash(T x) { return x; } +}; + +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float32 x) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); - } - }; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; - template - struct OneAdder +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float64 x) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - /// Имейте ввиду, что вычисление приближённое. - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; - template - struct OneAdder > +/** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. + * Используется для частичной специализации для добавления строк. + */ +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - data.set.insert(static_cast &>(column).getData()[row_num]); - } - }; + data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + } +}; - template<> - struct OneAdder > +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); + /// Имейте ввиду, что вычисление приближённое. + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; - UInt128 key; - SipHash hash; - hash.update(value.data, value.size); - hash.get128(key.first, key.second); - - data.set.insert(key); - } - }; - - template - struct OneAdder > +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - const auto & value = static_cast &>(column).getData()[row_num]; - data.set.insert(CombinedCardinalityTraits::hash(value)); - } - }; + data.set.insert(static_cast &>(column).getData()[row_num]); + } +}; - template<> - struct OneAdder > +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + StringRef value = column.getDataAt(row_num); + + UInt128 key; + SipHash hash; + hash.update(value.data, value.size); + hash.get128(key.first, key.second); + + data.set.insert(key); + } +}; + +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + const auto & value = static_cast &>(column).getData()[row_num]; + data.set.insert(CombinedCardinalityTraits::hash(value)); + } +}; + +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; + } From e9c504e9da30523569ba781e4b5ae3f5fbea2756 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Aug 2015 21:55:53 +0300 Subject: [PATCH 20/43] dbms: modified exception message for ParallelReplicas [#MTRSADMIN-1462]. --- dbms/src/Client/ParallelReplicas.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dbms/src/Client/ParallelReplicas.cpp b/dbms/src/Client/ParallelReplicas.cpp index 538d4cf2caf..0702482f4c9 100644 --- a/dbms/src/Client/ParallelReplicas.cpp +++ b/dbms/src/Client/ParallelReplicas.cpp @@ -214,7 +214,7 @@ Connection::Packet ParallelReplicas::receivePacketUnlocked() auto it = getReplicaForReading(); if (it == replica_map.end()) - throw Exception("No available replica", ErrorCodes::NO_AVAILABLE_REPLICA); + throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA); Connection * connection = it->second; Connection::Packet packet = connection->receivePacket(); @@ -285,9 +285,17 @@ ParallelReplicas::ReplicaMap::iterator ParallelReplicas::waitForReadEvent() if (connection != nullptr) read_list.push_back(connection->socket); } + int n = Poco::Net::Socket::select(read_list, write_list, except_list, settings->receive_timeout); + if (n == 0) - return replica_map.end(); + { + std::stringstream description; + for (auto it = replica_map.begin(); it != replica_map.end(); ++it) + description << (it != replica_map.begin() ? ", " : "") << it->second->getDescription(); + + throw Exception("Timeout exceeded while reading from " + description.str(), ErrorCodes::TIMEOUT_EXCEEDED); + } } auto & socket = read_list[rand() % read_list.size()]; From 4ab00524564fa94c3f3f991a21e38539ae294396 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 00:38:31 +0300 Subject: [PATCH 21/43] dbms: more compact formatting of queries with aliases: development [#METR-17606]. --- dbms/include/DB/Parsers/ASTAlterQuery.h | 84 ++++++++++ dbms/include/DB/Parsers/ASTAsterisk.h | 6 + dbms/include/DB/Parsers/ASTCheckQuery.h | 22 +++ .../include/DB/Parsers/ASTColumnDeclaration.h | 20 +++ dbms/include/DB/Parsers/ASTCreateQuery.h | 67 ++++++++ dbms/include/DB/Parsers/ASTDropQuery.h | 19 +++ dbms/include/DB/Parsers/ASTExpressionList.h | 34 ++++ dbms/include/DB/Parsers/ASTFunction.h | 3 + dbms/include/DB/Parsers/ASTIdentifier.h | 23 +++ dbms/include/DB/Parsers/ASTInsertQuery.h | 37 +++++ dbms/include/DB/Parsers/ASTJoin.h | 31 ++++ dbms/include/DB/Parsers/ASTLiteral.h | 16 ++ dbms/include/DB/Parsers/ASTNameTypePair.h | 9 ++ dbms/include/DB/Parsers/ASTOptimizeQuery.h | 7 + dbms/include/DB/Parsers/ASTOrderByElement.h | 12 ++ .../DB/Parsers/ASTQueryWithTableAndOutput.h | 22 +-- dbms/include/DB/Parsers/ASTRenameQuery.h | 16 ++ dbms/include/DB/Parsers/ASTSelectQuery.h | 3 + dbms/include/DB/Parsers/IAST.h | 64 ++++++++ dbms/include/DB/Parsers/formatAST.h | 2 +- dbms/src/Parsers/ASTSelectQuery.cpp | 148 ++++++++++++++++++ dbms/src/Parsers/formatAST.cpp | 27 +++- 22 files changed, 657 insertions(+), 15 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index 1d22d25e186..9bd8356762c 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -1,6 +1,8 @@ #pragma once #include +#include + namespace DB { @@ -98,5 +100,87 @@ public: } return res; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (settings.hilite ? hilite_none : ""); + + if (!table.empty()) + { + if (!database.empty()) + { + settings.ostr << indent_str << database; + settings.ostr << "."; + } + settings.ostr << indent_str << table; + } + settings.ostr << nl_or_ws; + + for (size_t i = 0; i < parameters.size(); ++i) + { + const ASTAlterQuery::Parameters & p = parameters[i]; + + if (p.type == ASTAlterQuery::ADD_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (settings.hilite ? hilite_none : ""); + p.col_decl->formatImpl(settings, state, frame); + + /// AFTER + if (p.column) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); + p.column->formatImpl(settings, state, frame); + } + } + else if (p.type == ASTAlterQuery::DROP_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "DROP COLUMN " << (settings.hilite ? hilite_none : ""); + p.column->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::MODIFY_COLUMN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (settings.hilite ? hilite_none : ""); + p.col_decl->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::DROP_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << (p.detach ? "DETACH" : "DROP") << " PARTITION " + << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::ATTACH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ATTACH " << (p.unreplicated ? "UNREPLICATED " : "") + << (p.part ? "PART " : "PARTITION ") << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + } + else if (p.type == ASTAlterQuery::FETCH_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FETCH " << (p.unreplicated ? "UNREPLICATED " : "") + << "PARTITION " << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") + << " FROM " << (settings.hilite ? hilite_none : "") << mysqlxx::quote << p.from; + } + else if (p.type == ASTAlterQuery::FREEZE_PARTITION) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (settings.hilite ? hilite_none : ""); + p.partition->formatImpl(settings, state, frame); + } + else + throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + + std::string comma = (i < (parameters.size() -1) ) ? "," : ""; + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << comma << (settings.hilite ? hilite_none : ""); + + settings.ostr << settings.nl_or_ws; + } + } }; + } diff --git a/dbms/include/DB/Parsers/ASTAsterisk.h b/dbms/include/DB/Parsers/ASTAsterisk.h index aa90d676c71..f0741539267 100644 --- a/dbms/include/DB/Parsers/ASTAsterisk.h +++ b/dbms/include/DB/Parsers/ASTAsterisk.h @@ -16,6 +16,12 @@ public: String getID() const override { return "Asterisk"; } ASTPtr clone() const override { return new ASTAsterisk(*this); } String getColumnName() const override { return "*"; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << "*"; + } }; } diff --git a/dbms/include/DB/Parsers/ASTCheckQuery.h b/dbms/include/DB/Parsers/ASTCheckQuery.h index 901ad7ef567..74b64aeaef8 100644 --- a/dbms/include/DB/Parsers/ASTCheckQuery.h +++ b/dbms/include/DB/Parsers/ASTCheckQuery.h @@ -19,6 +19,28 @@ struct ASTCheckQuery : public IAST std::string database; std::string table; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + std::string nl_or_nothing = settings.one_line ? "" : "\n"; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string nl_or_ws = settings.one_line ? " " : "\n"; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (settings.hilite ? hilite_none : ""); + + if (!table.empty()) + { + if (!database.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << database << (settings.hilite ? hilite_none : ""); + settings.ostr << "."; + } + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << table << (settings.hilite ? hilite_none : ""); + } + settings.ostr << nl_or_ws; + } }; } diff --git a/dbms/include/DB/Parsers/ASTColumnDeclaration.h b/dbms/include/DB/Parsers/ASTColumnDeclaration.h index 9862c5a81b6..722bc6d8283 100644 --- a/dbms/include/DB/Parsers/ASTColumnDeclaration.h +++ b/dbms/include/DB/Parsers/ASTColumnDeclaration.h @@ -40,6 +40,26 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << settings.nl_or_ws << indent_str << backQuoteIfNeed(name); + if (type) + { + settings.ostr << ' '; + type->formatImpl(settings, state, frame); + } + + if (default_expression) + { + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : "") << ' '; + default_expression->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTCreateQuery.h b/dbms/include/DB/Parsers/ASTCreateQuery.h index 33d69a8e8e3..57cfc1a35b2 100644 --- a/dbms/include/DB/Parsers/ASTCreateQuery.h +++ b/dbms/include/DB/Parsers/ASTCreateQuery.h @@ -48,6 +48,73 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + if (!database.empty() && table.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << (attach ? "ATTACH DATABASE " : "CREATE DATABASE ") << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(database); + return; + } + + { + std::string what = "TABLE"; + if (is_view) + what = "VIEW"; + if (is_materialized_view) + what = "MATERIALIZED VIEW"; + + settings.ostr + << (settings.hilite ? hilite_keyword : "") + << (attach ? "ATTACH " : "CREATE ") + << (is_temporary ? "TEMPORARY " : "") + << what + << " " << (if_not_exists ? "IF NOT EXISTS " : "") + << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + } + + if (!as_table.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : "") + << (!as_database.empty() ? backQuoteIfNeed(as_database) + "." : "") << backQuoteIfNeed(as_table); + } + + if (columns) + { + settings.ostr << (settings.one_line ? " (" : "\n("); + ++frame.indent; + columns->formatImpl(settings, state, frame); + settings.ostr << (settings.one_line ? ")" : "\n)"); + } + + if (storage && !is_materialized_view && !is_view) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ENGINE" << (settings.hilite ? hilite_none : "") << " = "; + storage->formatImpl(settings, state, frame); + } + + if (inner_storage) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ENGINE" << (settings.hilite ? hilite_none : "") << " = "; + inner_storage->formatImpl(settings, state, frame); + } + + if (is_populate) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " POPULATE" << (settings.hilite ? hilite_none : ""); + } + + if (select) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS" << settings.nl_or_ws << (settings.hilite ? hilite_none : ""); + select->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTDropQuery.h b/dbms/include/DB/Parsers/ASTDropQuery.h index c0ac24017d0..897f9afd4c6 100644 --- a/dbms/include/DB/Parsers/ASTDropQuery.h +++ b/dbms/include/DB/Parsers/ASTDropQuery.h @@ -24,6 +24,25 @@ public: String getID() const override { return (detach ? "DetachQuery_" : "DropQuery_") + database + "_" + table; }; ASTPtr clone() const override { return new ASTDropQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (table.empty() && !database.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") + << (detach ? "DETACH DATABASE " : "DROP DATABASE ") + << (if_exists ? "IF EXISTS " : "") + << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(database); + return; + } + + settings.ostr << (settings.hilite ? hilite_keyword : "") + << (detach ? "DETACH TABLE " : "DROP TABLE ") + << (if_exists ? "IF EXISTS " : "") << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + } }; } diff --git a/dbms/include/DB/Parsers/ASTExpressionList.h b/dbms/include/DB/Parsers/ASTExpressionList.h index 1ec814a8d1b..1a2fdb19cb7 100644 --- a/dbms/include/DB/Parsers/ASTExpressionList.h +++ b/dbms/include/DB/Parsers/ASTExpressionList.h @@ -31,6 +31,40 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + { + if (it != children.begin()) + settings.ostr << ", "; + + (*it)->formatImpl(settings, state, frame); + } + } + + + friend class ASTSelectQuery; + + /** Вывести список выражений в секциях запроса SELECT - по одному выражению на строку. + */ + void formatImplMultiline(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const + { + std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' '); + + ++frame.indent; + for (ASTs::const_iterator it = children.begin(); it != children.end(); ++it) + { + if (it != children.begin()) + settings.ostr << ", "; + + if (children.size() > 1) + settings.ostr << indent_str; + + (*it)->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 70380b67e7e..3801a32e638 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -82,6 +82,9 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 58ae38ca434..550973f298f 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -41,6 +42,28 @@ public: { set.insert(name); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + settings.ostr << (settings.hilite ? hilite_identifier : ""); + + WriteBufferFromOStream wb(settings.ostr, 32); + writeProbablyBackQuotedString(name, wb); + wb.next(); + + settings.ostr << (settings.hilite ? hilite_none : ""); + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTInsertQuery.h b/dbms/include/DB/Parsers/ASTInsertQuery.h index 5e6988bcfc6..c7a1879b55d 100644 --- a/dbms/include/DB/Parsers/ASTInsertQuery.h +++ b/dbms/include/DB/Parsers/ASTInsertQuery.h @@ -42,6 +42,43 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + settings.ostr << (settings.hilite ? hilite_keyword : "") << "INSERT INTO " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + if (!insert_id.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " ID = " << (settings.hilite ? hilite_none : "") + << mysqlxx::quote << insert_id; + + if (columns) + { + settings.ostr << " ("; + columns->formatImpl(settings, state, frame); + settings.ostr << ")"; + } + + if (select) + { + settings.ostr << " "; + select->formatImpl(settings, state, frame); + } + else + { + if (!format.empty()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format; + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : ""); + } + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 96f161c5c60..029d4f49350 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -83,6 +83,37 @@ public: return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + frame.need_parens = false; + + settings.ostr << (settings.hilite ? hilite_keyword : ""); + + if (locality == ASTJoin::Global) + settings.ostr << "GLOBAL "; + + if (kind != ASTJoin::Cross) + settings.ostr << (strictness == ASTJoin::Any ? "ANY " : "ALL "); + + settings.ostr << (kind == ASTJoin::Inner ? "INNER " + : (kind == ASTJoin::Left ? "LEFT " + : (kind == ASTJoin::Right ? "RIGHT " + : (kind == ASTJoin::Cross ? "CROSS " + : "FULL OUTER ")))); + + settings.ostr << "JOIN " + << (settings.hilite ? hilite_none : ""); + + table->formatImpl(settings, state, frame); + + if (kind != ASTJoin::Cross) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " USING " << (settings.hilite ? hilite_none : ""); + using_expr_list->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index 2a610255be8..c5f1ec91ed4 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -26,6 +26,22 @@ public: String getID() const override { return "Literal_" + apply_visitor(FieldVisitorDump(), value); } ASTPtr clone() const override { return new ASTLiteral(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (frame.need_parens && !alias.empty()) + settings.ostr <<'('; + + settings.ostr <formatImpl(settings, state, frame); + } }; } diff --git a/dbms/include/DB/Parsers/ASTOptimizeQuery.h b/dbms/include/DB/Parsers/ASTOptimizeQuery.h index 906b3d1edb5..2fb6921d2ed 100644 --- a/dbms/include/DB/Parsers/ASTOptimizeQuery.h +++ b/dbms/include/DB/Parsers/ASTOptimizeQuery.h @@ -22,6 +22,13 @@ public: String getID() const override { return "OptimizeQuery_" + database + "_" + table; }; ASTPtr clone() const override { return new ASTOptimizeQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + } }; } diff --git a/dbms/include/DB/Parsers/ASTOrderByElement.h b/dbms/include/DB/Parsers/ASTOrderByElement.h index f341265d93b..d7103a48e64 100644 --- a/dbms/include/DB/Parsers/ASTOrderByElement.h +++ b/dbms/include/DB/Parsers/ASTOrderByElement.h @@ -29,6 +29,18 @@ public: String getID() const override { return "OrderByElement"; } ASTPtr clone() const override { return new ASTOrderByElement(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + children.front()->formatImpl(settings, state, frame); + settings.ostr << (settings.hilite ? hilite_keyword : "") << (direction == -1 ? " DESC" : " ASC") << (settings.hilite ? hilite_none : ""); + if (!collator.isNull()) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " COLLATE " << (settings.hilite ? hilite_none : "") + << "'" << collator->getLocale() << "'"; + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h index 32ebb1e528d..015cb010ead 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h @@ -8,17 +8,17 @@ namespace DB { - /** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. - */ - class ASTQueryWithTableAndOutput : public ASTQueryWithOutput - { - public: - String database; - String table; - - ASTQueryWithTableAndOutput() = default; - ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} - }; +/** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. + */ +class ASTQueryWithTableAndOutput : public ASTQueryWithOutput +{ +public: + String database; + String table; + + ASTQueryWithTableAndOutput() = default; + ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} +}; /// Объявляет класс-наследник ASTQueryWithTableAndOutput с реализованными методами getID и clone. diff --git a/dbms/include/DB/Parsers/ASTRenameQuery.h b/dbms/include/DB/Parsers/ASTRenameQuery.h index 4eb6624e4c3..ffb59c3f0f8 100644 --- a/dbms/include/DB/Parsers/ASTRenameQuery.h +++ b/dbms/include/DB/Parsers/ASTRenameQuery.h @@ -34,6 +34,22 @@ public: String getID() const override { return "Rename"; }; ASTPtr clone() const override { return new ASTRenameQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "RENAME TABLE " << (settings.hilite ? hilite_none : ""); + + for (ASTRenameQuery::Elements::const_iterator it = elements.begin(); it != elements.end(); ++it) + { + if (it != elements.begin()) + settings.ostr << ", "; + + settings.ostr << (!it->from.database.empty() ? backQuoteIfNeed(it->from.database) + "." : "") << backQuoteIfNeed(it->from.table) + << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : "") + << (!it->to.database.empty() ? backQuoteIfNeed(it->to.database) + "." : "") << backQuoteIfNeed(it->to.table); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTSelectQuery.h b/dbms/include/DB/Parsers/ASTSelectQuery.h index b941046c534..6839f6616c9 100644 --- a/dbms/include/DB/Parsers/ASTSelectQuery.h +++ b/dbms/include/DB/Parsers/ASTSelectQuery.h @@ -68,6 +68,9 @@ public: ASTPtr prev_union_all; /// Следующий запрос SELECT в цепочке UNION ALL, если такой есть ASTPtr next_union_all; + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; } diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 803cc9eb31f..cb08ef5b408 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -133,6 +133,65 @@ public: (*it)->collectIdentifierNames(set); } + + /// Преобразовать в строку. + + /// Настройки формата. + struct FormatSettings + { + std::ostream & ostr; + bool hilite; + bool one_line; + + char nl_or_ws; + + FormatSettings(std::ostream & ostr_, bool hilite_, bool one_line_) + : ostr(ostr_), hilite(hilite_), one_line(one_line_) + { + nl_or_ws = one_line ? ' ' : '\n'; + } + }; + + /// Состояние. Например, множество узлов DAG, которых мы уже обошли. + struct FormatState + { + /// TODO + }; + + /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. + struct FormatStateStacked + { + bool indent = 0; + bool need_parens = false; + }; + + void format(const FormatSettings & settings) const + { + FormatState state; + formatImpl(settings, state, FormatStateStacked()); + } + +protected: + /// Для подсветки синтаксиса. + static const char * hilite_keyword; + static const char * hilite_identifier; + static const char * hilite_function; + static const char * hilite_operator; + static const char * hilite_alias; + static const char * hilite_none; + + + virtual void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const + { + throw Exception("Unknown element in AST: " + getID() + + ((range.first && (range.second > range.first)) + ? " '" + std::string(range.first, range.second - range.first) + "'" + : ""), + ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + } + + void writeAlias(const String & name, std::ostream & s, bool hilite); + private: size_t checkDepthImpl(size_t max_depth, size_t level) const { @@ -152,4 +211,9 @@ private: typedef SharedPtr ASTPtr; typedef std::vector ASTs; + +/// Квотировать идентификатор обратными кавычками, если это требуется. +String backQuoteIfNeed(const String & x); + + } diff --git a/dbms/include/DB/Parsers/formatAST.h b/dbms/include/DB/Parsers/formatAST.h index 811d946f044..3fdfded2aa3 100644 --- a/dbms/include/DB/Parsers/formatAST.h +++ b/dbms/include/DB/Parsers/formatAST.h @@ -12,7 +12,7 @@ namespace DB /** Берёт синтаксическое дерево и превращает его обратно в текст. * В случае запроса INSERT, данные будут отсутствовать. */ -void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false); +void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false); String formatColumnsForCreateQuery(NamesAndTypesList & columns); diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index ff6f53d5a4e..5da7767dda5 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -1,5 +1,7 @@ +#include #include + namespace DB { @@ -217,5 +219,151 @@ const IAST * ASTSelectQuery::getFormat() const return query->format.get(); } + +void ASTSelectQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override +{ + frame.need_parens = false; + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (settings.hilite ? hilite_none : ""); + + settings.one_line + ? select_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*select_expression_list).formatImplMultiline(settings, state, frame); + + if (table) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FROM " << (settings.hilite ? hilite_none : ""); + if (database) + { + database->formatImpl(settings, state, frame); + settings.ostr << "."; + } + + if (typeid_cast(&*table)) + { + if (settings.one_line) + settings.ostr << " ("; + else + settings.ostr << "\n" << indent_str << "(\n"; + + table->formatImpl(settings, state, frame); + + if (settings.one_line) + settings.ostr << ")"; + else + settings.ostr << "\n" << indent_str << ")"; + } + else + table->formatImpl(settings, state, frame); + } + + if (final) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FINAL" << (settings.hilite ? hilite_none : ""); + } + + if (sample_size) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SAMPLE " << (settings.hilite ? hilite_none : ""); + sample_size->formatImpl(settings, state, frame); + } + + if (array_join_expression_list) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str + << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (settings.hilite ? hilite_none : ""); + + settings.one_line + ? array_join_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*array_join_expression_list).formatImplMultiline(settings, state, frame); + } + + if (join) + { + settings.ostr << " "; + join->formatImpl(settings, state, frame); + } + + if (prewhere_expression) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "PREWHERE " << (settings.hilite ? hilite_none : ""); + prewhere_expression->formatImpl(settings, state, frame); + } + + if (where_expression) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "WHERE " << (settings.hilite ? hilite_none : ""); + where_expression, s, indent, hilite, settings.one_line); + } + + if (group_expression_list) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "GROUP BY " << (settings.hilite ? hilite_none : ""); + settings.one_line + ? group_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*group_expression_list).formatImplMultiline(settings, state, frame); + } + + if (group_by_with_totals) + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << (settings.one_line ? "" : " ") << "WITH TOTALS" << (settings.hilite ? hilite_none : ""); + + if (having_expression) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "HAVING " << (settings.hilite ? hilite_none : ""); + having_expression->formatImpl(settings, state, frame); + } + + if (order_expression_list) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "ORDER BY " << (settings.hilite ? hilite_none : ""); + settings.one_line + ? order_expression_list->formatImpl(settings, state, frame) + : typeid_cast(*order_expression_list).formatImplMultiline(settings, state, frame); + } + + if (limit_length) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "LIMIT " << (settings.hilite ? hilite_none : ""); + if (limit_offset) + { + limit_offset->formatImpl(settings, state, frame); + settings.ostr << ", "; + } + limit_length->formatImpl(settings, state, frame); + } + + if (settings) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SETTINGS " << (settings.hilite ? hilite_none : ""); + + const ASTSetQuery & ast_set = typeid_cast(*settings); + for (ASTSetQuery::Changes::const_iterator it = ast_set.changes.begin(); it != ast_set.changes.end(); ++it) + { + if (it != ast_set.changes.begin()) + settings.ostr << ", "; + + settings.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); + } + } + + if (format) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); + format->formatImpl(settings, state, frame); + } + + if (next_union_all) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "UNION ALL " << settings.nl_or_ws << settings.ostr << (settings.hilite ? hilite_none : ""); + + // NOTE Мы можем безопасно применить static_cast вместо typeid_cast, потому что знаем, что в цепочке UNION ALL + // имеются только деревья типа SELECT. + const ASTSelectQuery & next_ast = static_cast(*next_union_all); + + next_ast->formatImpl(settings, state, frame); + } +} + }; diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 5d0854827ef..4e0d38ea6b8 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -63,7 +63,7 @@ String backQuoteIfNeed(const String & x) } -static String hightlight(const String & keyword, const String & color_sequence, const bool hilite) +static String highlight(const String & keyword, const String & color_sequence, const bool hilite) { return hilite ? color_sequence + keyword + hilite_none : keyword; } @@ -81,6 +81,21 @@ static void writeAlias(const String & name, std::ostream & s, bool hilite, bool } +struct FormatState +{ + std::ostream & s; + bool hilite; + bool one_line; + + void formatImpl(const IAST & ast, size_t indent, bool need_parens); + + +}; + + + + + void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) @@ -729,7 +744,7 @@ void formatAST(const ASTColumnDeclaration & ast, std::ostream & s, size_t indent if (ast.default_expression) { - s << ' ' << hightlight(ast.default_specifier, hilite_keyword, hilite) << ' '; + s << ' ' << highlight(ast.default_specifier, hilite_keyword, hilite) << ' '; formatAST(*ast.default_expression, s, indent, hilite, one_line); } } @@ -908,10 +923,16 @@ void formatAST(const ASTMultiQuery & ast, std::ostream & s, size_t indent, bool void formatAST(const IAST & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) { + FormatState state = { .s = s, .hilite = hilite, .one_line = one_line }; + state.formatImpl(ast, indent, need_parens); +} + +void FormatState::formatImpl(const IAST & ast, size_t indent, bool need_parens) +{ #define DISPATCH(NAME) \ else if (const AST ## NAME * concrete = typeid_cast(&ast)) \ - formatAST(*concrete, s, indent, hilite, one_line, need_parens); + state.formatImpl(*concrete, indent, need_parens); if (false) {} DISPATCH(SelectQuery) From 4eac02304382ab9b3ad7692b9a05cc2ecf58677e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 00:39:42 +0300 Subject: [PATCH 22/43] dbms: addition to prev. revision [#METR-17606]. --- dbms/src/Parsers/ASTFunction.cpp | 197 +++++++++++++++++++++++++++++++ dbms/src/Parsers/IAST.cpp | 39 ++++++ 2 files changed, 236 insertions(+) create mode 100644 dbms/src/Parsers/ASTFunction.cpp create mode 100644 dbms/src/Parsers/IAST.cpp diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp new file mode 100644 index 00000000000..fe85ba99bbb --- /dev/null +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -0,0 +1,197 @@ +#include +#include + + +namespace DB +{ + void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + FormatStateStacked nested_need_parens; + FormatStateStacked nested_dont_need_parens; + nested_need_parens.need_parens = true; + nested_dont_need_parens.need_parens = false; + + /// Стоит ли записать эту функцию в виде оператора? + bool written = false; + if (arguments && !parameters) + { + if (arguments->children.size() == 1) + { + const char * operators[] = + { + "negate", "-", + "not", "NOT ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + + /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: + * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. + * Вместо этого, добавим пробел. + * PS. Нельзя просто попросить добавить скобки - см. formatImpl для ASTLiteral. + */ + if (name == "negate" && typeid_cast(&*arguments->children[0])) + settings.ostr << ' '; + + FormatStateStacked nested_frame; + nested_frame.need_parens = true; + arguments->formatImpl(settings, state, nested_need_parens); + written = true; + } + } + } + + /** need_parens - нужны ли скобки вокруг выражения с оператором. + * Они нужны, только если это выражение входит в другое выражение с оператором. + */ + + if (!written && arguments->children.size() == 2) + { + const char * operators[] = + { + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "like", " LIKE ", + "notLike", " NOT LIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) + { + if (frame.need_parens) + settings.ostr << '('; + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); + if (frame.need_parens) + settings.ostr << ')'; + written = true; + } + } + + if (!written && 0 == strcmp(name.c_str(), "arrayElement")) + { + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + written = true; + } + + if (!written && 0 == strcmp(name.c_str(), "tupleElement")) + { + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); + written = true; + } + } + + if (!written && arguments->children.size() >= 2) + { + const char * operators[] = + { + "and", " AND ", + "or", " OR ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) + { + if (frame.need_parens) + settings.ostr << '('; + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + arguments->children[i]->formatImpl(settings, state, nested_need_parens); + } + if (frame.need_parens) + settings.ostr << ')'; + written = true; + } + } + } + + if (!written && arguments->children.size() >= 1 && 0 == strcmp(name.c_str(), "array")) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + written = true; + } + + if (!written && arguments->children.size() >= 2 && 0 == strcmp(name.c_str(), "tuple")) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); + written = true; + } + } + + if (!written) + { + settings.ostr << (settings.hilite ? hilite_function : "") << name; + + if (parameters) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + parameters->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + if (arguments) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + arguments->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); + } + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } + } +} diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp new file mode 100644 index 00000000000..c8036519eae --- /dev/null +++ b/dbms/src/Parsers/IAST.cpp @@ -0,0 +1,39 @@ +#include +#include + + +namespace DB +{ + +const char * IAST::hilite_keyword = "\033[1m"; +const char * IAST::hilite_identifier = "\033[0;36m"; +const char * IAST::hilite_function = "\033[0;33m"; +const char * IAST::hilite_operator = "\033[1;33m"; +const char * IAST::hilite_alias = "\033[0;32m"; +const char * IAST::hilite_none = "\033[0m"; + + +/// Квотировать идентификатор обратными кавычками, если это требуется. +String backQuoteIfNeed(const String & x) +{ + String res(x.size(), '\0'); + { + WriteBufferFromString wb(res); + writeProbablyBackQuotedString(x, wb); + } + return res; +} + + +void IAST::writeAlias(const String & name, std::ostream & s, bool hilite) +{ + s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); + + WriteBufferFromOStream wb(s, 32); + writeProbablyBackQuotedString(name, wb); + wb.next(); + + s << (hilite ? hilite_none : ""); +} + +} From e072db7da23347730c00f3beabb461011e82da90 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 06:26:27 +0300 Subject: [PATCH 23/43] dbms: preparation to more compact query formatting [#METR-17606]. --- dbms/include/DB/Parsers/ASTAlterQuery.h | 2 +- dbms/include/DB/Parsers/ASTNameTypePair.h | 2 +- dbms/include/DB/Parsers/ASTQueryWithOutput.h | 8 +- .../DB/Parsers/ASTQueryWithTableAndOutput.h | 32 +- dbms/include/DB/Parsers/ASTSet.h | 11 + dbms/include/DB/Parsers/ASTSetQuery.h | 16 +- .../DB/Parsers/ASTShowProcesslistQuery.h | 4 +- dbms/include/DB/Parsers/ASTShowTablesQuery.h | 35 +- dbms/include/DB/Parsers/ASTSubquery.h | 24 + dbms/include/DB/Parsers/ASTUseQuery.h | 9 +- dbms/include/DB/Parsers/IAST.h | 4 +- .../DB/Parsers/TablePropertiesQueriesASTs.h | 18 +- dbms/include/DB/Parsers/formatAST.h | 7 +- dbms/src/Parsers/ASTFunction.cpp | 304 +++--- dbms/src/Parsers/ASTSelectQuery.cpp | 106 +- dbms/src/Parsers/IAST.cpp | 2 +- dbms/src/Parsers/formatAST.cpp | 969 ------------------ 17 files changed, 347 insertions(+), 1206 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTAlterQuery.h b/dbms/include/DB/Parsers/ASTAlterQuery.h index 9bd8356762c..d0e88080071 100644 --- a/dbms/include/DB/Parsers/ASTAlterQuery.h +++ b/dbms/include/DB/Parsers/ASTAlterQuery.h @@ -119,7 +119,7 @@ protected: } settings.ostr << indent_str << table; } - settings.ostr << nl_or_ws; + settings.ostr << settings.nl_or_ws; for (size_t i = 0; i < parameters.size(); ++i) { diff --git a/dbms/include/DB/Parsers/ASTNameTypePair.h b/dbms/include/DB/Parsers/ASTNameTypePair.h index e2c392a08ff..4768c7b67a2 100644 --- a/dbms/include/DB/Parsers/ASTNameTypePair.h +++ b/dbms/include/DB/Parsers/ASTNameTypePair.h @@ -40,7 +40,7 @@ protected: { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - s << settings.nl_or_ws << indent_str << backQuoteIfNeed(name) << " "; + settings.ostr << settings.nl_or_ws << indent_str << backQuoteIfNeed(name) << " "; type->formatImpl(settings, state, frame); } }; diff --git a/dbms/include/DB/Parsers/ASTQueryWithOutput.h b/dbms/include/DB/Parsers/ASTQueryWithOutput.h index 92cba621eab..3a0bb4eac0f 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithOutput.h @@ -24,7 +24,7 @@ public: /// Объявляет класс-наследник ASTQueryWithOutput с реализованными методами getID и clone. -#define DEFINE_AST_QUERY_WITH_OUTPUT(Name, ID) \ +#define DEFINE_AST_QUERY_WITH_OUTPUT(Name, ID, Query) \ class Name : public ASTQueryWithOutput \ { \ public: \ @@ -44,6 +44,12 @@ public: \ } \ return ptr; \ } \ +\ +protected: \ + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override \ + { \ + settings.ostr << (settings.hilite ? hilite_keyword : "") << Query << (settings.hilite ? hilite_none : ""); \ + } \ }; } diff --git a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h index 015cb010ead..d99025593d0 100644 --- a/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h +++ b/dbms/include/DB/Parsers/ASTQueryWithTableAndOutput.h @@ -6,8 +6,8 @@ namespace DB { - - + + /** Запрос с указанием названия таблицы и, возможно, БД и секцией FORMAT. */ class ASTQueryWithTableAndOutput : public ASTQueryWithOutput @@ -18,14 +18,28 @@ public: ASTQueryWithTableAndOutput() = default; ASTQueryWithTableAndOutput(const StringRange range_) : ASTQueryWithOutput(range_) {} + +protected: + void formatHelper(const FormatSettings & settings, FormatState & state, FormatStateStacked frame, const char * name) const + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << name << " " << (settings.hilite ? hilite_none : "") + << (!database.empty() ? backQuoteIfNeed(database) + "." : "") << backQuoteIfNeed(table); + + if (format) + { + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); + format->formatImpl(settings, state, frame); + } + } }; - - + + /// Объявляет класс-наследник ASTQueryWithTableAndOutput с реализованными методами getID и clone. -#define DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(Name, ID) \ +#define DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(Name, ID, Query) \ class Name : public ASTQueryWithTableAndOutput \ { \ -public: \ + public: \ Name() = default; \ Name(const StringRange range_) : ASTQueryWithTableAndOutput(range_) {} \ String getID() const override { return ID"_" + database + "_" + table; }; \ @@ -42,5 +56,11 @@ public: \ } \ return ptr; \ } \ + \ + protected: \ + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override \ + { \ + formatHelper(settings, state, frame, Query); \ + } \ }; } diff --git a/dbms/include/DB/Parsers/ASTSet.h b/dbms/include/DB/Parsers/ASTSet.h index 18edddc999f..d3af0b5f30f 100644 --- a/dbms/include/DB/Parsers/ASTSet.h +++ b/dbms/include/DB/Parsers/ASTSet.h @@ -22,6 +22,17 @@ public: String getID() const override { return "Set_" + getColumnName(); } ASTPtr clone() const override { return new ASTSet(*this); } String getColumnName() const override { return column_name; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + /** Подготовленное множество. В пользовательских запросах такого не бывает, но такое бывает после промежуточных преобразований запроса. + * Выведем его не по-настоящему (это не будет корректным запросом, но покажет, что здесь было множество). + */ + settings.ostr << (settings.hilite ? hilite_keyword : "") + << "(...)" + << (settings.hilite ? hilite_none : ""); + } }; } diff --git a/dbms/include/DB/Parsers/ASTSetQuery.h b/dbms/include/DB/Parsers/ASTSetQuery.h index 0334d1167bb..471c76d2855 100644 --- a/dbms/include/DB/Parsers/ASTSetQuery.h +++ b/dbms/include/DB/Parsers/ASTSetQuery.h @@ -26,11 +26,25 @@ public: ASTSetQuery() = default; ASTSetQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "Set"; }; ASTPtr clone() const override { return new ASTSetQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SET " << (global ? "GLOBAL " : "") << (settings.hilite ? hilite_none : ""); + + for (ASTSetQuery::Changes::const_iterator it = changes.begin(); it != changes.end(); ++it) + { + if (it != changes.begin()) + settings.ostr << ", "; + + settings.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h b/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h index 8d06950319e..c1bd4f35eb3 100644 --- a/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h +++ b/dbms/include/DB/Parsers/ASTShowProcesslistQuery.h @@ -5,5 +5,7 @@ namespace DB { - DEFINE_AST_QUERY_WITH_OUTPUT(ASTShowProcesslistQuery, "ShowProcesslistQuery") + +DEFINE_AST_QUERY_WITH_OUTPUT(ASTShowProcesslistQuery, "ShowProcesslistQuery", "SHOW PROCESSLIST") + } diff --git a/dbms/include/DB/Parsers/ASTShowTablesQuery.h b/dbms/include/DB/Parsers/ASTShowTablesQuery.h index 4b51b8f1aba..ccd40bb164e 100644 --- a/dbms/include/DB/Parsers/ASTShowTablesQuery.h +++ b/dbms/include/DB/Parsers/ASTShowTablesQuery.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -20,7 +21,7 @@ public: ASTShowTablesQuery() = default; ASTShowTablesQuery(const StringRange range_) : ASTQueryWithOutput(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "ShowTables"; }; @@ -30,15 +31,43 @@ public: ASTPtr ptr{res}; res->children.clear(); - + if (format) { res->format = format->clone(); res->children.push_back(res->format); } - + return ptr; } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + if (databases) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW DATABASES" << (settings.hilite ? hilite_none : ""); + } + else + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "SHOW TABLES" << (settings.hilite ? hilite_none : ""); + + if (!from.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : "") + << backQuoteIfNeed(from); + + if (!like.empty()) + settings.ostr << (settings.hilite ? hilite_keyword : "") << " LIKE " << (settings.hilite ? hilite_none : "") + << mysqlxx::quote << like; + } + + if (format) + { + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); + format->formatImpl(settings, state, frame); + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index 8dac88b26d3..461129a0ae6 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -34,6 +34,30 @@ public: } String getColumnName() const override { return getTreeID(); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string nl_or_nothing = settings.one_line ? "" : "\n"; + + settings.ostr << nl_or_nothing << indent_str << "(" << nl_or_nothing; + FormatStateStacked frame_dont_need_parens = frame; + frame_dont_need_parens.need_parens = false; + children[0]->formatImpl(settings, state, frame_dont_need_parens); + settings.ostr << nl_or_nothing << indent_str << ")"; + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } + } }; } diff --git a/dbms/include/DB/Parsers/ASTUseQuery.h b/dbms/include/DB/Parsers/ASTUseQuery.h index eafe3496293..a1e354b39b0 100644 --- a/dbms/include/DB/Parsers/ASTUseQuery.h +++ b/dbms/include/DB/Parsers/ASTUseQuery.h @@ -16,11 +16,18 @@ public: ASTUseQuery() = default; ASTUseQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return "UseQuery_" + database; }; ASTPtr clone() const override { return new ASTUseQuery(*this); } + +protected: + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); + return; + } }; } diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index cb08ef5b408..4ee619d0c9f 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -171,7 +171,7 @@ public: formatImpl(settings, state, FormatStateStacked()); } -protected: + /// Для подсветки синтаксиса. static const char * hilite_keyword; static const char * hilite_identifier; @@ -190,7 +190,7 @@ protected: ErrorCodes::UNKNOWN_ELEMENT_IN_AST); } - void writeAlias(const String & name, std::ostream & s, bool hilite); + void writeAlias(const String & name, std::ostream & s, bool hilite) const; private: size_t checkDepthImpl(size_t max_depth, size_t level) const diff --git a/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h b/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h index eb06c893167..793ee655b7d 100644 --- a/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h +++ b/dbms/include/DB/Parsers/TablePropertiesQueriesASTs.h @@ -5,17 +5,9 @@ namespace DB { - - /** EXISTS запрос - */ - DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTExistsQuery, "ExistsQuery") - - /** SHOW CREATE TABLE запрос - */ - DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTShowCreateQuery, "ShowCreateQuery") - - /** DESCRIBE TABLE запрос - */ - DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTDescribeQuery, "DescribeQuery") - + +DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTExistsQuery, "ExistsQuery", "EXISTS TABLE") +DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTShowCreateQuery, "ShowCreateQuery", "SHOW CREATE TABLE") +DEFINE_AST_QUERY_WITH_TABLE_AND_OUTPUT(ASTDescribeQuery, "DescribeQuery", "DESCRIBE TABLE") + } diff --git a/dbms/include/DB/Parsers/formatAST.h b/dbms/include/DB/Parsers/formatAST.h index 3fdfded2aa3..d94602e9d66 100644 --- a/dbms/include/DB/Parsers/formatAST.h +++ b/dbms/include/DB/Parsers/formatAST.h @@ -12,11 +12,14 @@ namespace DB /** Берёт синтаксическое дерево и превращает его обратно в текст. * В случае запроса INSERT, данные будут отсутствовать. */ -void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false, bool need_parens = false); +inline void formatAST(const IAST & ast, std::ostream & s, size_t indent = 0, bool hilite = true, bool one_line = false) +{ + IAST::FormatSettings settings(s, hilite, one_line); + ast.format(settings); +} String formatColumnsForCreateQuery(NamesAndTypesList & columns); -String backQuoteIfNeed(const String & x); inline std::ostream & operator<<(std::ostream & os, const IAST & ast) { return formatAST(ast, os, 0, false, true), os; } inline std::ostream & operator<<(std::ostream & os, const ASTPtr & ast) { return formatAST(*ast, os, 0, false, true), os; } diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index fe85ba99bbb..1791d837970 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -4,194 +4,196 @@ namespace DB { - void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + +void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr << '('; + + FormatStateStacked nested_need_parens = frame; + FormatStateStacked nested_dont_need_parens = frame; + nested_need_parens.need_parens = true; + nested_dont_need_parens.need_parens = false; + + /// Стоит ли записать эту функцию в виде оператора? + bool written = false; + if (arguments && !parameters) { - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (frame.need_parens && !alias.empty()) - settings.ostr << '('; - - FormatStateStacked nested_need_parens; - FormatStateStacked nested_dont_need_parens; - nested_need_parens.need_parens = true; - nested_dont_need_parens.need_parens = false; - - /// Стоит ли записать эту функцию в виде оператора? - bool written = false; - if (arguments && !parameters) + if (arguments->children.size() == 1) { - if (arguments->children.size() == 1) + const char * operators[] = { - const char * operators[] = - { - "negate", "-", - "not", "NOT ", - nullptr - }; + "negate", "-", + "not", "NOT ", + nullptr + }; - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(name.c_str(), func[0])) - { - settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - - /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: - * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. - * Вместо этого, добавим пробел. - * PS. Нельзя просто попросить добавить скобки - см. formatImpl для ASTLiteral. - */ - if (name == "negate" && typeid_cast(&*arguments->children[0])) - settings.ostr << ' '; - - FormatStateStacked nested_frame; - nested_frame.need_parens = true; - arguments->formatImpl(settings, state, nested_need_parens); - written = true; - } - } - } - - /** need_parens - нужны ли скобки вокруг выражения с оператором. - * Они нужны, только если это выражение входит в другое выражение с оператором. - */ - - if (!written && arguments->children.size() == 2) + for (const char ** func = operators; *func; func += 2) { - const char * operators[] = + if (0 == strcmp(name.c_str(), func[0])) { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "notLike", " NOT LIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", - nullptr - }; + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(name.c_str(), func[0])) - { - if (frame.need_parens) - settings.ostr << '('; - arguments->children[0]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); - if (frame.need_parens) - settings.ostr << ')'; - written = true; - } - } + /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: + * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. + * Вместо этого, добавим пробел. + * PS. Нельзя просто попросить добавить скобки - см. formatImpl для ASTLiteral. + */ + if (name == "negate" && typeid_cast(&*arguments->children[0])) + settings.ostr << ' '; - if (!written && 0 == strcmp(name.c_str(), "arrayElement")) - { - arguments->children[0]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); - arguments->children[1]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + FormatStateStacked nested_frame; + nested_frame.need_parens = true; + arguments->formatImpl(settings, state, nested_need_parens); written = true; } + } + } - if (!written && 0 == strcmp(name.c_str(), "tupleElement")) + /** need_parens - нужны ли скобки вокруг выражения с оператором. + * Они нужны, только если это выражение входит в другое выражение с оператором. + */ + + if (!written && arguments->children.size() == 2) + { + const char * operators[] = + { + "multiply", " * ", + "divide", " / ", + "modulo", " % ", + "plus", " + ", + "minus", " - ", + "notEquals", " != ", + "lessOrEquals", " <= ", + "greaterOrEquals", " >= ", + "less", " < ", + "greater", " > ", + "equals", " = ", + "like", " LIKE ", + "notLike", " NOT LIKE ", + "in", " IN ", + "notIn", " NOT IN ", + "globalIn", " GLOBAL IN ", + "globalNotIn", " GLOBAL NOT IN ", + nullptr + }; + + for (const char ** func = operators; *func; func += 2) + { + if (0 == strcmp(name.c_str(), func[0])) { + if (frame.need_parens) + settings.ostr << '('; arguments->children[0]->formatImpl(settings, state, nested_need_parens); - settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); arguments->children[1]->formatImpl(settings, state, nested_need_parens); + if (frame.need_parens) + settings.ostr << ')'; written = true; } } - if (!written && arguments->children.size() >= 2) - { - const char * operators[] = - { - "and", " AND ", - "or", " OR ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(name.c_str(), func[0])) - { - if (frame.need_parens) - settings.ostr << '('; - for (size_t i = 0; i < arguments->children.size(); ++i) - { - if (i != 0) - settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); - arguments->children[i]->formatImpl(settings, state, nested_need_parens); - } - if (frame.need_parens) - settings.ostr << ')'; - written = true; - } - } - } - - if (!written && arguments->children.size() >= 1 && 0 == strcmp(name.c_str(), "array")) + if (!written && 0 == strcmp(name.c_str(), "arrayElement")) { + arguments->children[0]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0; i < arguments->children.size(); ++i) - { - if (i != 0) - settings.ostr << ", "; - arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); - } + arguments->children[1]->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); written = true; } - if (!written && arguments->children.size() >= 2 && 0 == strcmp(name.c_str(), "tuple")) + if (!written && 0 == strcmp(name.c_str(), "tupleElement")) { - settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0; i < arguments->children.size(); ++i) - { - if (i != 0) - settings.ostr << ", "; - arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); - } - settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); + arguments->children[0]->formatImpl(settings, state, nested_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << "." << (settings.hilite ? hilite_none : ""); + arguments->children[1]->formatImpl(settings, state, nested_need_parens); written = true; } } - if (!written) + if (!written && arguments->children.size() >= 2) { - settings.ostr << (settings.hilite ? hilite_function : "") << name; - - if (parameters) + const char * operators[] = { - settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - parameters->formatImpl(settings, state, nested_dont_need_parens); - settings.ostr << (settings.hilite ? hilite_function : "") << ')'; - } + "and", " AND ", + "or", " OR ", + nullptr + }; - if (arguments) + for (const char ** func = operators; *func; func += 2) { - settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - arguments->formatImpl(settings, state, nested_dont_need_parens); - settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + if (0 == strcmp(name.c_str(), func[0])) + { + if (frame.need_parens) + settings.ostr << '('; + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + arguments->children[i]->formatImpl(settings, state, nested_need_parens); + } + if (frame.need_parens) + settings.ostr << ')'; + written = true; + } } - - settings.ostr << (settings.hilite ? hilite_none : ""); } - if (!alias.empty()) + if (!written && arguments->children.size() >= 1 && 0 == strcmp(name.c_str(), "array")) { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; + settings.ostr << (settings.hilite ? hilite_operator : "") << '[' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ']' << (settings.hilite ? hilite_none : ""); + written = true; + } + + if (!written && arguments->children.size() >= 2 && 0 == strcmp(name.c_str(), "tuple")) + { + settings.ostr << (settings.hilite ? hilite_operator : "") << '(' << (settings.hilite ? hilite_none : ""); + for (size_t i = 0; i < arguments->children.size(); ++i) + { + if (i != 0) + settings.ostr << ", "; + arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + } + settings.ostr << (settings.hilite ? hilite_operator : "") << ')' << (settings.hilite ? hilite_none : ""); + written = true; } } + + if (!written) + { + settings.ostr << (settings.hilite ? hilite_function : "") << name; + + if (parameters) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + parameters->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + if (arguments) + { + settings.ostr << '(' << (settings.hilite ? hilite_none : ""); + arguments->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_function : "") << ')'; + } + + settings.ostr << (settings.hilite ? hilite_none : ""); + } + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr << ')'; + } +} + } diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 5da7767dda5..76064357b2f 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -220,148 +220,148 @@ const IAST * ASTSelectQuery::getFormat() const } -void ASTSelectQuery::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override +void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << indent_str << "SELECT " << (distinct ? "DISTINCT " : "") << (s.hilite ? hilite_none : ""); - settings.one_line - ? select_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*select_expression_list).formatImplMultiline(settings, state, frame); + s.one_line + ? select_expression_list->formatImpl(s, state, frame) + : typeid_cast(*select_expression_list).formatImplMultiline(s, state, frame); if (table) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FROM " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FROM " << (s.hilite ? hilite_none : ""); if (database) { - database->formatImpl(settings, state, frame); - settings.ostr << "."; + database->formatImpl(s, state, frame); + s.ostr << "."; } if (typeid_cast(&*table)) { - if (settings.one_line) - settings.ostr << " ("; + if (s.one_line) + s.ostr << " ("; else - settings.ostr << "\n" << indent_str << "(\n"; + s.ostr << "\n" << indent_str << "(\n"; - table->formatImpl(settings, state, frame); + table->formatImpl(s, state, frame); - if (settings.one_line) - settings.ostr << ")"; + if (s.one_line) + s.ostr << ")"; else - settings.ostr << "\n" << indent_str << ")"; + s.ostr << "\n" << indent_str << ")"; } else - table->formatImpl(settings, state, frame); + table->formatImpl(s, state, frame); } if (final) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FINAL" << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FINAL" << (s.hilite ? hilite_none : ""); } if (sample_size) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SAMPLE " << (settings.hilite ? hilite_none : ""); - sample_size->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SAMPLE " << (s.hilite ? hilite_none : ""); + sample_size->formatImpl(s, state, frame); } if (array_join_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str - << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str + << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (s.hilite ? hilite_none : ""); - settings.one_line - ? array_join_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*array_join_expression_list).formatImplMultiline(settings, state, frame); + s.one_line + ? array_join_expression_list->formatImpl(s, state, frame) + : typeid_cast(*array_join_expression_list).formatImplMultiline(s, state, frame); } if (join) { - settings.ostr << " "; - join->formatImpl(settings, state, frame); + s.ostr << " "; + join->formatImpl(s, state, frame); } if (prewhere_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "PREWHERE " << (settings.hilite ? hilite_none : ""); - prewhere_expression->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "PREWHERE " << (s.hilite ? hilite_none : ""); + prewhere_expression->formatImpl(s, state, frame); } if (where_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "WHERE " << (settings.hilite ? hilite_none : ""); - where_expression, s, indent, hilite, settings.one_line); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "WHERE " << (s.hilite ? hilite_none : ""); + where_expression->formatImpl(s, state, frame); } if (group_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "GROUP BY " << (settings.hilite ? hilite_none : ""); - settings.one_line - ? group_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*group_expression_list).formatImplMultiline(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY " << (s.hilite ? hilite_none : ""); + s.one_line + ? group_expression_list->formatImpl(s, state, frame) + : typeid_cast(*group_expression_list).formatImplMultiline(s, state, frame); } if (group_by_with_totals) - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << (settings.one_line ? "" : " ") << "WITH TOTALS" << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH TOTALS" << (s.hilite ? hilite_none : ""); if (having_expression) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "HAVING " << (settings.hilite ? hilite_none : ""); - having_expression->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "HAVING " << (s.hilite ? hilite_none : ""); + having_expression->formatImpl(s, state, frame); } if (order_expression_list) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "ORDER BY " << (settings.hilite ? hilite_none : ""); - settings.one_line - ? order_expression_list->formatImpl(settings, state, frame) - : typeid_cast(*order_expression_list).formatImplMultiline(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); + s.one_line + ? order_expression_list->formatImpl(s, state, frame) + : typeid_cast(*order_expression_list).formatImplMultiline(s, state, frame); } if (limit_length) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "LIMIT " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "LIMIT " << (s.hilite ? hilite_none : ""); if (limit_offset) { - limit_offset->formatImpl(settings, state, frame); - settings.ostr << ", "; + limit_offset->formatImpl(s, state, frame); + s.ostr << ", "; } - limit_length->formatImpl(settings, state, frame); + limit_length->formatImpl(s, state, frame); } if (settings) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "SETTINGS " << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SETTINGS " << (s.hilite ? hilite_none : ""); const ASTSetQuery & ast_set = typeid_cast(*settings); for (ASTSetQuery::Changes::const_iterator it = ast_set.changes.begin(); it != ast_set.changes.end(); ++it) { if (it != ast_set.changes.begin()) - settings.ostr << ", "; + s.ostr << ", "; - settings.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); + s.ostr << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); } } if (format) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "FORMAT " << (settings.hilite ? hilite_none : ""); - format->formatImpl(settings, state, frame); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FORMAT " << (s.hilite ? hilite_none : ""); + format->formatImpl(s, state, frame); } if (next_union_all) { - settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << settings.ostr << indent_str << "UNION ALL " << settings.nl_or_ws << settings.ostr << (settings.hilite ? hilite_none : ""); + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "UNION ALL " << s.nl_or_ws << (s.hilite ? hilite_none : ""); // NOTE Мы можем безопасно применить static_cast вместо typeid_cast, потому что знаем, что в цепочке UNION ALL // имеются только деревья типа SELECT. const ASTSelectQuery & next_ast = static_cast(*next_union_all); - next_ast->formatImpl(settings, state, frame); + next_ast.formatImpl(s, state, frame); } } diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index c8036519eae..2c6f0b8a3e1 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -25,7 +25,7 @@ String backQuoteIfNeed(const String & x) } -void IAST::writeAlias(const String & name, std::ostream & s, bool hilite) +void IAST::writeAlias(const String & name, std::ostream & s, bool hilite) const { s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); diff --git a/dbms/src/Parsers/formatAST.cpp b/dbms/src/Parsers/formatAST.cpp index 4e0d38ea6b8..7966b0db491 100644 --- a/dbms/src/Parsers/formatAST.cpp +++ b/dbms/src/Parsers/formatAST.cpp @@ -1,978 +1,9 @@ -#include - -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include - #include namespace DB { - -static const char * hilite_keyword = "\033[1m"; -static const char * hilite_identifier = "\033[0;36m"; -static const char * hilite_function = "\033[0;33m"; -static const char * hilite_operator = "\033[1;33m"; -static const char * hilite_alias = "\033[0;32m"; -static const char * hilite_none = "\033[0m"; - - -/// Квотировать идентификатор обратными кавычками, если это требуется. -String backQuoteIfNeed(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeProbablyBackQuotedString(x, wb); - } - return res; -} - - -static String highlight(const String & keyword, const String & color_sequence, const bool hilite) -{ - return hilite ? color_sequence + keyword + hilite_none : keyword; -} - - -static void writeAlias(const String & name, std::ostream & s, bool hilite, bool one_line) -{ - s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_alias : ""); - - WriteBufferFromOStream wb(s, 32); - writeProbablyBackQuotedString(name, wb); - wb.next(); - - s << (hilite ? hilite_none : ""); -} - - -struct FormatState -{ - std::ostream & s; - bool hilite; - bool one_line; - - void formatImpl(const IAST & ast, size_t indent, bool need_parens); - - -}; - - - - - -void formatAST(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) - { - if (it != ast.children.begin()) - s << ", "; - - formatAST(**it, s, indent, hilite, one_line, need_parens); - } -} - -/** Вывести список выражений в секциях запроса SELECT - по одному выражению на строку. - */ -static void formatExpressionListMultiline(const ASTExpressionList & ast, std::ostream & s, size_t indent, bool hilite) -{ - std::string indent_str = "\n" + std::string(4 * (indent + 1), ' '); - - for (ASTs::const_iterator it = ast.children.begin(); it != ast.children.end(); ++it) - { - if (it != ast.children.begin()) - s << ", "; - - if (ast.children.size() > 1) - s << indent_str; - - formatAST(**it, s, indent + 1, hilite, false); - } -} - - -void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_nothing = one_line ? "" : "\n"; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << (hilite ? hilite_keyword : "") << indent_str << "SELECT " << (ast.distinct ? "DISTINCT " : "") << (hilite ? hilite_none : ""); - one_line - ? formatAST(*ast.select_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.select_expression_list), s, indent, hilite); - - if (ast.table) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FROM " << (hilite ? hilite_none : ""); - if (ast.database) - { - formatAST(*ast.database, s, indent, hilite, one_line); - s << "."; - } - - if (typeid_cast(&*ast.table)) - { - if (one_line) - s << " ("; - else - s << "\n" << indent_str << "(\n"; - - formatAST(*ast.table, s, indent + 1, hilite, one_line); - - if (one_line) - s << ")"; - else - s << "\n" << indent_str << ")"; - } - else - formatAST(*ast.table, s, indent, hilite, one_line); - } - - if (ast.final) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FINAL" << (hilite ? hilite_none : ""); - } - - if (ast.sample_size) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "SAMPLE " << (hilite ? hilite_none : ""); - formatAST(*ast.sample_size, s, indent, hilite, one_line); - } - - if (ast.array_join_expression_list) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str - << (ast.array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (hilite ? hilite_none : ""); - - one_line - ? formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.array_join_expression_list), s, indent, hilite); - } - - if (ast.join) - { - s << " "; - formatAST(*ast.join, s, indent, hilite, one_line); - } - - if (ast.prewhere_expression) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "PREWHERE " << (hilite ? hilite_none : ""); - formatAST(*ast.prewhere_expression, s, indent, hilite, one_line); - } - - if (ast.where_expression) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "WHERE " << (hilite ? hilite_none : ""); - formatAST(*ast.where_expression, s, indent, hilite, one_line); - } - - if (ast.group_expression_list) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "GROUP BY " << (hilite ? hilite_none : ""); - one_line - ? formatAST(*ast.group_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.group_expression_list), s, indent, hilite); - } - - if (ast.group_by_with_totals) - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << (one_line ? "" : " ") << "WITH TOTALS" << (hilite ? hilite_none : ""); - - if (ast.having_expression) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "HAVING " << (hilite ? hilite_none : ""); - formatAST(*ast.having_expression, s, indent, hilite, one_line); - } - - if (ast.order_expression_list) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ORDER BY " << (hilite ? hilite_none : ""); - one_line - ? formatAST(*ast.order_expression_list, s, indent, hilite, one_line) - : formatExpressionListMultiline(typeid_cast(*ast.order_expression_list), s, indent, hilite); - } - - if (ast.limit_length) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "LIMIT " << (hilite ? hilite_none : ""); - if (ast.limit_offset) - { - formatAST(*ast.limit_offset, s, indent, hilite, one_line); - s << ", "; - } - formatAST(*ast.limit_length, s, indent, hilite, one_line); - } - - if (ast.settings) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "SETTINGS " << (hilite ? hilite_none : ""); - - const ASTSetQuery & ast_set = typeid_cast(*ast.settings); - for (ASTSetQuery::Changes::const_iterator it = ast_set.changes.begin(); it != ast_set.changes.end(); ++it) - { - if (it != ast_set.changes.begin()) - s << ", "; - - s << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); - } - } - - if (ast.format) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FORMAT " << (hilite ? hilite_none : ""); - formatAST(*ast.format, s, indent, hilite, one_line); - } - - if (ast.next_union_all) - { - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "UNION ALL " << nl_or_ws << (hilite ? hilite_none : ""); - - // NOTE Мы можем безопасно применить static_cast вместо typeid_cast, потому что знаем, что в цепочке UNION ALL - // имеются только деревья типа SELECT. - const ASTSelectQuery & next_ast = static_cast(*ast.next_union_all); - - formatAST(next_ast, s, indent, hilite, one_line, need_parens); - } -} - -void formatAST(const ASTSubquery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (need_parens && !ast.alias.empty()) - s << '('; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_nothing = one_line ? "" : "\n"; - - s << nl_or_nothing << indent_str << "(" << nl_or_nothing; - formatAST(*ast.children[0], s, indent + 1, hilite, one_line); - s << nl_or_nothing << indent_str << ")"; - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTCreateQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_ws = one_line ? " " : "\n"; - - if (!ast.database.empty() && ast.table.empty()) - { - s << (hilite ? hilite_keyword : "") << (ast.attach ? "ATTACH DATABASE " : "CREATE DATABASE ") << (ast.if_not_exists ? "IF NOT EXISTS " : "") << (hilite ? hilite_none : "") - << backQuoteIfNeed(ast.database); - return; - } - - { - std::string what = "TABLE"; - if (ast.is_view) - what = "VIEW"; - if (ast.is_materialized_view) - what = "MATERIALIZED VIEW"; - - s << (hilite ? hilite_keyword : "") << (ast.attach ? "ATTACH " : "CREATE ") << (ast.is_temporary ? "TEMPORARY " : "") << what << " " << (ast.if_not_exists ? "IF NOT EXISTS " : "") << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); - } - - if (!ast.as_table.empty()) - { - s << (hilite ? hilite_keyword : "") << " AS " << (hilite ? hilite_none : "") - << (!ast.as_database.empty() ? backQuoteIfNeed(ast.as_database) + "." : "") << backQuoteIfNeed(ast.as_table); - } - - if (ast.columns) - { - s << (one_line ? " (" : "\n("); - formatAST(*ast.columns, s, indent + 1, hilite, one_line); - s << (one_line ? ")" : "\n)"); - } - - if (ast.storage && !ast.is_materialized_view && !ast.is_view) - { - s << (hilite ? hilite_keyword : "") << " ENGINE" << (hilite ? hilite_none : "") << " = "; - formatAST(*ast.storage, s, indent, hilite, one_line); - } - - if (ast.inner_storage) - { - s << (hilite ? hilite_keyword : "") << " ENGINE" << (hilite ? hilite_none : "") << " = "; - formatAST(*ast.inner_storage, s, indent, hilite, one_line); - } - - if (ast.is_populate) - { - s << (hilite ? hilite_keyword : "") << " POPULATE" << (hilite ? hilite_none : ""); - } - - if (ast.select) - { - s << (hilite ? hilite_keyword : "") << " AS" << nl_or_ws << (hilite ? hilite_none : ""); - formatAST(*ast.select, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTDropQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (ast.table.empty() && !ast.database.empty()) - { - s << (hilite ? hilite_keyword : "") << (ast.detach ? "DETACH DATABASE " : "DROP DATABASE ") << (ast.if_exists ? "IF EXISTS " : "") << (hilite ? hilite_none : "") << backQuoteIfNeed(ast.database); - return; - } - - s << (hilite ? hilite_keyword : "") << (ast.detach ? "DETACH TABLE " : "DROP TABLE ") << (ast.if_exists ? "IF EXISTS " : "") << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); -} - -void formatAST(const ASTOptimizeQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "OPTIMIZE TABLE " << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); -} - -void formatAST(const ASTQueryWithTableAndOutput & ast, std::string name, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << name << " " << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); - - if (ast.format) - { - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FORMAT " << (hilite ? hilite_none : ""); - formatAST(*ast.format, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTExistsQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(static_cast(ast), "EXISTS TABLE", s, indent, hilite, one_line, false); -} - -void formatAST(const ASTDescribeQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(static_cast(ast), "DESCRIBE TABLE", s, indent, hilite, one_line, false); -} - -void formatAST(const ASTShowCreateQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(static_cast(ast), "SHOW CREATE TABLE", s, indent, hilite, one_line, false); -} - -void formatAST(const ASTRenameQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "RENAME TABLE " << (hilite ? hilite_none : ""); - - for (ASTRenameQuery::Elements::const_iterator it = ast.elements.begin(); it != ast.elements.end(); ++it) - { - if (it != ast.elements.begin()) - s << ", "; - - s << (!it->from.database.empty() ? backQuoteIfNeed(it->from.database) + "." : "") << backQuoteIfNeed(it->from.table) - << (hilite ? hilite_keyword : "") << " TO " << (hilite ? hilite_none : "") - << (!it->to.database.empty() ? backQuoteIfNeed(it->to.database) + "." : "") << backQuoteIfNeed(it->to.table); - } -} - -void formatAST(const ASTSetQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "SET " << (ast.global ? "GLOBAL " : "") << (hilite ? hilite_none : ""); - - for (ASTSetQuery::Changes::const_iterator it = ast.changes.begin(); it != ast.changes.end(); ++it) - { - if (it != ast.changes.begin()) - s << ", "; - - s << it->name << " = " << apply_visitor(FieldVisitorToString(), it->value); - } -} - -void formatAST(const ASTShowTablesQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (ast.databases) - { - s << (hilite ? hilite_keyword : "") << "SHOW DATABASES" << (hilite ? hilite_none : ""); - } - else - { - s << (hilite ? hilite_keyword : "") << "SHOW TABLES" << (hilite ? hilite_none : ""); - - if (!ast.from.empty()) - s << (hilite ? hilite_keyword : "") << " FROM " << (hilite ? hilite_none : "") - << backQuoteIfNeed(ast.from); - - if (!ast.like.empty()) - s << (hilite ? hilite_keyword : "") << " LIKE " << (hilite ? hilite_none : "") - << mysqlxx::quote << ast.like; - } - - if (ast.format) - { - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "FORMAT " << (hilite ? hilite_none : ""); - formatAST(*ast.format, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTUseQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "USE " << (hilite ? hilite_none : "") << backQuoteIfNeed(ast.database); - return; -} - -void formatAST(const ASTShowProcesslistQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "SHOW PROCESSLIST" << (hilite ? hilite_none : ""); - return; -} - -void formatAST(const ASTInsertQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "INSERT INTO " << (hilite ? hilite_none : "") - << (!ast.database.empty() ? backQuoteIfNeed(ast.database) + "." : "") << backQuoteIfNeed(ast.table); - - if (!ast.insert_id.empty()) - s << (hilite ? hilite_keyword : "") << " ID = " << (hilite ? hilite_none : "") - << mysqlxx::quote << ast.insert_id; - - if (ast.columns) - { - s << " ("; - formatAST(*ast.columns, s, indent, hilite, one_line); - s << ")"; - } - - if (ast.select) - { - s << " "; - formatAST(*ast.select, s, indent, hilite, one_line); - } - else - { - if (!ast.format.empty()) - { - s << (hilite ? hilite_keyword : "") << " FORMAT " << (hilite ? hilite_none : "") << ast.format; - } - else - { - s << (hilite ? hilite_keyword : "") << " VALUES" << (hilite ? hilite_none : ""); - } - } -} - -void formatAST(const ASTFunction & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (need_parens && !ast.alias.empty()) - s << '('; - - /// Стоит ли записать эту функцию в виде оператора? - bool written = false; - if (ast.arguments && !ast.parameters) - { - if (ast.arguments->children.size() == 1) - { - const char * operators[] = - { - "negate", "-", - "not", "NOT ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(ast.name.c_str(), func[0])) - { - s << (hilite ? hilite_operator : "") << func[1] << (hilite ? hilite_none : ""); - - /** Особо дурацкий случай. Если у нас унарный минус перед литералом, являющимся отрицательным числом: - * "-(-1)" или "- -1", то это нельзя форматировать как --1, так как это будет воспринято как комментарий. - * Вместо этого, добавим пробел. - * PS. Нельзя просто попросить добавить скобки - см. formatAST для ASTLiteral. - */ - if (ast.name == "negate" && typeid_cast(&*ast.arguments->children[0])) - s << ' '; - - formatAST(*ast.arguments, s, indent, hilite, one_line, true); - written = true; - } - } - } - - /** need_parens - нужны ли скобки вокруг выражения с оператором. - * Они нужны, только если это выражение входит в другое выражение с оператором. - */ - - if (!written && ast.arguments->children.size() == 2) - { - const char * operators[] = - { - "multiply", " * ", - "divide", " / ", - "modulo", " % ", - "plus", " + ", - "minus", " - ", - "notEquals", " != ", - "lessOrEquals", " <= ", - "greaterOrEquals", " >= ", - "less", " < ", - "greater", " > ", - "equals", " = ", - "like", " LIKE ", - "notLike", " NOT LIKE ", - "in", " IN ", - "notIn", " NOT IN ", - "globalIn", " GLOBAL IN ", - "globalNotIn", " GLOBAL NOT IN ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(ast.name.c_str(), func[0])) - { - if (need_parens) - s << '('; - formatAST(*ast.arguments->children[0], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << func[1] << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[1], s, indent, hilite, one_line, true); - if (need_parens) - s << ')'; - written = true; - } - } - - if (!written && 0 == strcmp(ast.name.c_str(), "arrayElement")) - { - formatAST(*ast.arguments->children[0], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << '[' << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[1], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << ']' << (hilite ? hilite_none : ""); - written = true; - } - - if (!written && 0 == strcmp(ast.name.c_str(), "tupleElement")) - { - formatAST(*ast.arguments->children[0], s, indent, hilite, one_line, true); - s << (hilite ? hilite_operator : "") << "." << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[1], s, indent, hilite, one_line, true); - written = true; - } - } - - if (!written && ast.arguments->children.size() >= 2) - { - const char * operators[] = - { - "and", " AND ", - "or", " OR ", - nullptr - }; - - for (const char ** func = operators; *func; func += 2) - { - if (0 == strcmp(ast.name.c_str(), func[0])) - { - if (need_parens) - s << '('; - for (size_t i = 0; i < ast.arguments->children.size(); ++i) - { - if (i != 0) - s << (hilite ? hilite_operator : "") << func[1] << (hilite ? hilite_none : ""); - formatAST(*ast.arguments->children[i], s, indent, hilite, one_line, true); - } - if (need_parens) - s << ')'; - written = true; - } - } - } - - if (!written && ast.arguments->children.size() >= 1 && 0 == strcmp(ast.name.c_str(), "array")) - { - s << (hilite ? hilite_operator : "") << '[' << (hilite ? hilite_none : ""); - for (size_t i = 0; i < ast.arguments->children.size(); ++i) - { - if (i != 0) - s << ", "; - formatAST(*ast.arguments->children[i], s, indent, hilite, one_line, false); - } - s << (hilite ? hilite_operator : "") << ']' << (hilite ? hilite_none : ""); - written = true; - } - - if (!written && ast.arguments->children.size() >= 2 && 0 == strcmp(ast.name.c_str(), "tuple")) - { - s << (hilite ? hilite_operator : "") << '(' << (hilite ? hilite_none : ""); - for (size_t i = 0; i < ast.arguments->children.size(); ++i) - { - if (i != 0) - s << ", "; - formatAST(*ast.arguments->children[i], s, indent, hilite, one_line, false); - } - s << (hilite ? hilite_operator : "") << ')' << (hilite ? hilite_none : ""); - written = true; - } - } - - if (!written) - { - s << (hilite ? hilite_function : "") << ast.name; - - if (ast.parameters) - { - s << '(' << (hilite ? hilite_none : ""); - formatAST(*ast.parameters, s, indent, hilite, one_line); - s << (hilite ? hilite_function : "") << ')'; - } - - if (ast.arguments) - { - s << '(' << (hilite ? hilite_none : ""); - formatAST(*ast.arguments, s, indent, hilite, one_line); - s << (hilite ? hilite_function : "") << ')'; - } - - s << (hilite ? hilite_none : ""); - } - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTIdentifier & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (need_parens && !ast.alias.empty()) - s << '('; - - s << (hilite ? hilite_identifier : ""); - - WriteBufferFromOStream wb(s, 32); - writeProbablyBackQuotedString(ast.name, wb); - wb.next(); - - s << (hilite ? hilite_none : ""); - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTLiteral & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - if (need_parens && !ast.alias.empty()) - s << '('; - - s << apply_visitor(FieldVisitorToString(), ast.value); - - if (!ast.alias.empty()) - { - writeAlias(ast.alias, s, hilite, one_line); - if (need_parens) - s << ')'; - } -} - -void formatAST(const ASTNameTypePair & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << nl_or_ws << indent_str << backQuoteIfNeed(ast.name) << " "; - formatAST(*ast.type, s, indent, hilite, one_line); -} - -void formatAST(const ASTColumnDeclaration & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << nl_or_ws << indent_str << backQuoteIfNeed(ast.name); - if (ast.type) - { - s << ' '; - formatAST(*ast.type, s, indent, hilite, one_line); - } - - if (ast.default_expression) - { - s << ' ' << highlight(ast.default_specifier, hilite_keyword, hilite) << ' '; - formatAST(*ast.default_expression, s, indent, hilite, one_line); - } -} - -void formatAST(const ASTAsterisk & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << "*"; -} - -void formatAST(const ASTOrderByElement & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - formatAST(*ast.children.front(), s, indent, hilite, one_line); - s << (hilite ? hilite_keyword : "") << (ast.direction == -1 ? " DESC" : " ASC") << (hilite ? hilite_none : ""); - if (!ast.collator.isNull()) - { - s << (hilite ? hilite_keyword : "") << " COLLATE " << (hilite ? hilite_none : "") - << "'" << ast.collator->getLocale() << "'"; - } -} - -void formatAST(const ASTAlterQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_nothing = one_line ? "" : "\n"; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << (hilite ? hilite_keyword : "") << indent_str << "ALTER TABLE " << (hilite ? hilite_none : ""); - - if (!ast.table.empty()) - { - if (!ast.database.empty()) - { - s << indent_str << ast.database; - s << "."; - } - s << indent_str << ast.table; - } - s << nl_or_ws; - - for (size_t i = 0; i < ast.parameters.size(); ++i) - { - const ASTAlterQuery::Parameters &p = ast.parameters[i]; - - if (p.type == ASTAlterQuery::ADD_COLUMN) - { - s << (hilite ? hilite_keyword : "") << indent_str << "ADD COLUMN " << (hilite ? hilite_none : ""); - formatAST(*p.col_decl, s, indent, hilite, true); - - /// AFTER - if (p.column) - { - s << (hilite ? hilite_keyword : "") << indent_str << " AFTER " << (hilite ? hilite_none : ""); - formatAST(*p.column, s, indent, hilite, one_line); - } - } - else if (p.type == ASTAlterQuery::DROP_COLUMN) - { - s << (hilite ? hilite_keyword : "") << indent_str << "DROP COLUMN " << (hilite ? hilite_none : ""); - formatAST(*p.column, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::MODIFY_COLUMN) - { - s << (hilite ? hilite_keyword : "") << indent_str << "MODIFY COLUMN " << (hilite ? hilite_none : ""); - formatAST(*p.col_decl, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::DROP_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << (p.detach ? "DETACH" : "DROP") << " PARTITION " - << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::ATTACH_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << "ATTACH " << (p.unreplicated ? "UNREPLICATED " : "") - << (p.part ? "PART " : "PARTITION ") << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - } - else if (p.type == ASTAlterQuery::FETCH_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << "FETCH " << (p.unreplicated ? "UNREPLICATED " : "") - << "PARTITION " << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - s << (hilite ? hilite_keyword : "") << " FROM " << (hilite ? hilite_none : "") - << mysqlxx::quote << p.from; - } - else if (p.type == ASTAlterQuery::FREEZE_PARTITION) - { - s << (hilite ? hilite_keyword : "") << indent_str << "FREEZE PARTITION " << (hilite ? hilite_none : ""); - formatAST(*p.partition, s, indent, hilite, true); - } - else - throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - - std::string comma = (i < (ast.parameters.size() -1) ) ? "," : ""; - s << (hilite ? hilite_keyword : "") << indent_str << comma << (hilite ? hilite_none : ""); - - s << nl_or_ws; - } -} - -void formatAST(const ASTSet & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - /** Подготовленное множество. В пользовательских запросах такого не бывает, но такое бывает после промежуточных преобразований запроса. - * Выведем его не по-настоящему (это не будет корректным запросом, но покажет, что здесь было множество). - */ - s << (hilite ? hilite_keyword : "") - << "(...)" - << (hilite ? hilite_none : ""); -} - -void formatAST(const ASTJoin & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : ""); - - if (ast.locality == ASTJoin::Global) - s << "GLOBAL "; - - if (ast.kind != ASTJoin::Cross) - s << (ast.strictness == ASTJoin::Any ? "ANY " : "ALL "); - - s << (ast.kind == ASTJoin::Inner ? "INNER " - : (ast.kind == ASTJoin::Left ? "LEFT " - : (ast.kind == ASTJoin::Right ? "RIGHT " - : (ast.kind == ASTJoin::Cross ? "CROSS " - : "FULL OUTER ")))); - - s << "JOIN " - << (hilite ? hilite_none : ""); - - formatAST(*ast.table, s, indent, hilite, one_line, need_parens); - - if (ast.kind != ASTJoin::Cross) - { - s << (hilite ? hilite_keyword : "") << " USING " << (hilite ? hilite_none : ""); - formatAST(*ast.using_expr_list, s, indent, hilite, one_line, need_parens); - } -} - -void formatAST(const ASTCheckQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - std::string nl_or_nothing = one_line ? "" : "\n"; - - std::string indent_str = one_line ? "" : std::string(4 * indent, ' '); - std::string nl_or_ws = one_line ? " " : "\n"; - - s << (hilite ? hilite_keyword : "") << indent_str << "CHECK TABLE " << (hilite ? hilite_none : ""); - - if (!ast.table.empty()) - { - if (!ast.database.empty()) - { - s << (hilite ? hilite_keyword : "") << indent_str << ast.database << (hilite ? hilite_none : ""); - s << "."; - } - s << (hilite ? hilite_keyword : "") << indent_str << ast.table << (hilite ? hilite_none : ""); - } - s << nl_or_ws; -} - -/* -void formatAST(const ASTMultiQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - s << (hilite ? hilite_keyword : "") << "{" << (hilite ? hilite_none : ""); - - for (const auto & child : ast.children) - { - s << "\n"; - formatAST(*child, s, indent + 1, hilite, one_line, need_parens); - s << ";\n"; - } - - s << (hilite ? hilite_keyword : "") << "}" << (hilite ? hilite_none : ""); -}*/ - - -void formatAST(const IAST & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens) -{ - FormatState state = { .s = s, .hilite = hilite, .one_line = one_line }; - state.formatImpl(ast, indent, need_parens); -} - - -void FormatState::formatImpl(const IAST & ast, size_t indent, bool need_parens) -{ -#define DISPATCH(NAME) \ - else if (const AST ## NAME * concrete = typeid_cast(&ast)) \ - state.formatImpl(*concrete, indent, need_parens); - - if (false) {} - DISPATCH(SelectQuery) - DISPATCH(InsertQuery) - DISPATCH(CreateQuery) - DISPATCH(DropQuery) - DISPATCH(RenameQuery) - DISPATCH(ShowTablesQuery) - DISPATCH(UseQuery) - DISPATCH(SetQuery) - DISPATCH(OptimizeQuery) - DISPATCH(ExistsQuery) - DISPATCH(ShowCreateQuery) - DISPATCH(DescribeQuery) - DISPATCH(ExpressionList) - DISPATCH(Function) - DISPATCH(Identifier) - DISPATCH(Literal) - DISPATCH(NameTypePair) - DISPATCH(ColumnDeclaration) - DISPATCH(Asterisk) - DISPATCH(OrderByElement) - DISPATCH(Subquery) - DISPATCH(AlterQuery) - DISPATCH(ShowProcesslistQuery) - DISPATCH(Set) - DISPATCH(Join) - DISPATCH(CheckQuery) -// DISPATCH(MultiQuery) - else - throw Exception("Unknown element in AST: " + ast.getID() - + ((ast.range.first && (ast.range.second > ast.range.first)) - ? " '" + std::string(ast.range.first, ast.range.second - ast.range.first) + "'" - : ""), - ErrorCodes::UNKNOWN_ELEMENT_IN_AST); - -#undef DISPATCH -} - - String formatColumnsForCreateQuery(NamesAndTypesList & columns) { std::string res; From 5ed70eeb275e137053269f8d7c0712e376cd553f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Aug 2015 07:28:59 +0300 Subject: [PATCH 24/43] dbms: preparation [#METR-17606]. --- dbms/include/DB/Parsers/ASTFunction.h | 2 +- dbms/include/DB/Parsers/ASTIdentifier.h | 12 +--------- dbms/include/DB/Parsers/ASTLiteral.h | 14 ++---------- dbms/include/DB/Parsers/ASTSubquery.h | 13 +---------- dbms/include/DB/Parsers/ASTWithAlias.h | 30 +++++++++++++++++++++++++ dbms/include/DB/Parsers/IAST.h | 6 ++--- dbms/src/Parsers/ASTFunction.cpp | 13 +---------- 7 files changed, 39 insertions(+), 51 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index 3801a32e638..c9857b20c83 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -84,7 +84,7 @@ public: } protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index 550973f298f..adeb068f61f 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -44,11 +44,8 @@ public: } protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - if (frame.need_parens && !alias.empty()) - settings.ostr << '('; - settings.ostr << (settings.hilite ? hilite_identifier : ""); WriteBufferFromOStream wb(settings.ostr, 32); @@ -56,13 +53,6 @@ protected: wb.next(); settings.ostr << (settings.hilite ? hilite_none : ""); - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; - } } }; diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index c5f1ec91ed4..6ee40f78afd 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -28,19 +28,9 @@ public: ASTPtr clone() const override { return new ASTLiteral(*this); } protected: - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { - if (frame.need_parens && !alias.empty()) - settings.ostr <<'('; - - settings.ostr <formatImpl(settings, state, frame_dont_need_parens); settings.ostr << nl_or_nothing << indent_str << ")"; - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; - } } }; diff --git a/dbms/include/DB/Parsers/ASTWithAlias.h b/dbms/include/DB/Parsers/ASTWithAlias.h index 63eaa186cd3..29478d6a381 100644 --- a/dbms/include/DB/Parsers/ASTWithAlias.h +++ b/dbms/include/DB/Parsers/ASTWithAlias.h @@ -1,5 +1,6 @@ #pragma once +#include #include @@ -19,6 +20,35 @@ public: String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; } String tryGetAlias() const override { return alias; } void setAlias(const String & to) override { alias = to; } + + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final + { + if (!alias.empty()) + { + /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. + if (!state.printed_asts_with_alias.insert(this).second) + { + WriteBufferFromOStream wb(settings.ostr, 32); + writeProbablyBackQuotedString(alias, wb); + return; + } + } + + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr <<'('; + + formatImplWithAlias(settings, state, frame); + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr <<')'; + } + } + + virtual void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; }; /// helper for setting aliases and chaining result to other functions diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 4ee619d0c9f..fb1b2aee2af 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -14,8 +16,6 @@ #include #include -#include - namespace DB { @@ -155,7 +155,7 @@ public: /// Состояние. Например, множество узлов DAG, которых мы уже обошли. struct FormatState { - /// TODO + std::unordered_set printed_asts_with_alias; }; /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 1791d837970..3c2591d3376 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -5,12 +5,8 @@ namespace DB { -void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +void ASTFunction::formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (frame.need_parens && !alias.empty()) - settings.ostr << '('; - FormatStateStacked nested_need_parens = frame; FormatStateStacked nested_dont_need_parens = frame; nested_need_parens.need_parens = true; @@ -187,13 +183,6 @@ void ASTFunction::formatImpl(const FormatSettings & settings, FormatState & stat settings.ostr << (settings.hilite ? hilite_none : ""); } - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr << ')'; - } } } From 32da2a7d8c1edf94ab4b41bba8af3a717e2fbf5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Aug 2015 00:32:51 +0300 Subject: [PATCH 25/43] dbms: improved query formatting for distributed queries [#METR-17606]. --- dbms/include/DB/Parsers/ASTWithAlias.h | 28 ++-------------- dbms/include/DB/Parsers/IAST.h | 29 ++++++++-------- dbms/src/Parsers/ASTSelectQuery.cpp | 1 + dbms/src/Parsers/ASTWithAlias.cpp | 33 +++++++++++++++++++ .../00211_query_formatting_aliases.reference | 1 + .../00211_query_formatting_aliases.sql | 6 ++++ 6 files changed, 59 insertions(+), 39 deletions(-) create mode 100644 dbms/src/Parsers/ASTWithAlias.cpp create mode 100644 dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference create mode 100644 dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql diff --git a/dbms/include/DB/Parsers/ASTWithAlias.h b/dbms/include/DB/Parsers/ASTWithAlias.h index 29478d6a381..0ade6e26ce9 100644 --- a/dbms/include/DB/Parsers/ASTWithAlias.h +++ b/dbms/include/DB/Parsers/ASTWithAlias.h @@ -21,32 +21,8 @@ public: String tryGetAlias() const override { return alias; } void setAlias(const String & to) override { alias = to; } - void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final - { - if (!alias.empty()) - { - /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. - if (!state.printed_asts_with_alias.insert(this).second) - { - WriteBufferFromOStream wb(settings.ostr, 32); - writeProbablyBackQuotedString(alias, wb); - return; - } - } - - /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. - if (frame.need_parens && !alias.empty()) - settings.ostr <<'('; - - formatImplWithAlias(settings, state, frame); - - if (!alias.empty()) - { - writeAlias(alias, settings.ostr, settings.hilite); - if (frame.need_parens) - settings.ostr <<')'; - } - } + /// Вызывает formatImplWithAlias, а также выводит алиас. Если надо - заключает всё выражение в скобки. + void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final; virtual void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; }; diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index fb1b2aee2af..2f3bc41db60 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include @@ -152,10 +152,13 @@ public: } }; - /// Состояние. Например, множество узлов DAG, которых мы уже обошли. + /// Состояние. Например, может запоминаться множество узлов, которых мы уже обошли. struct FormatState { - std::unordered_set printed_asts_with_alias; + /** Запрос SELECT, в котором найден алиас; идентификатор узла с таким алиасом. + * Нужно, чтобы когда узел встретился повторно, выводить только алиас. + */ + std::set> printed_asts_with_alias; }; /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. @@ -163,6 +166,7 @@ public: { bool indent = 0; bool need_parens = false; + const IAST * current_select = nullptr; }; void format(const FormatSettings & settings) const @@ -171,16 +175,6 @@ public: formatImpl(settings, state, FormatStateStacked()); } - - /// Для подсветки синтаксиса. - static const char * hilite_keyword; - static const char * hilite_identifier; - static const char * hilite_function; - static const char * hilite_operator; - static const char * hilite_alias; - static const char * hilite_none; - - virtual void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { throw Exception("Unknown element in AST: " + getID() @@ -192,6 +186,15 @@ public: void writeAlias(const String & name, std::ostream & s, bool hilite) const; +protected: + /// Для подсветки синтаксиса. + static const char * hilite_keyword; + static const char * hilite_identifier; + static const char * hilite_function; + static const char * hilite_operator; + static const char * hilite_alias; + static const char * hilite_none; + private: size_t checkDepthImpl(size_t max_depth, size_t level) const { diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 76064357b2f..35c4214e75c 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -222,6 +222,7 @@ const IAST * ASTSelectQuery::getFormat() const void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { + frame.current_select = this; frame.need_parens = false; std::string indent_str = s.one_line ? "" : std::string(4 * frame.indent, ' '); diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp new file mode 100644 index 00000000000..e1319fcafea --- /dev/null +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -0,0 +1,33 @@ +#include + +namespace DB +{ + +void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +{ + if (!alias.empty()) + { + /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. + if (!state.printed_asts_with_alias.emplace(frame.current_select, getID()).second) + { + WriteBufferFromOStream wb(settings.ostr, 32); + writeProbablyBackQuotedString(alias, wb); + return; + } + } + + /// Если есть алиас, то требуются скобки вокруг всего выражения, включая алиас. Потому что запись вида 0 AS x + 0 синтаксически некорректна. + if (frame.need_parens && !alias.empty()) + settings.ostr <<'('; + + formatImplWithAlias(settings, state, frame); + + if (!alias.empty()) + { + writeAlias(alias, settings.ostr, settings.hilite); + if (frame.need_parens) + settings.ostr <<')'; + } +} + +} diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference new file mode 100644 index 00000000000..dd143e07d02 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference @@ -0,0 +1 @@ +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql new file mode 100644 index 00000000000..4628b6ea26a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql @@ -0,0 +1,6 @@ +SELECT toUInt64(1) IN (1234567890, 2345678901, 3456789012, 4567890123, 5678901234, 6789012345, 7890123456, 8901234567, 9012345678, 123456789) AS x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, + x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x +FROM remote('127.0.0.1', system, one) SETTINGS max_query_size = 10000; From ec1b05bf540babae4836ec0c407365c6e73d11d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Aug 2015 00:38:52 +0300 Subject: [PATCH 26/43] dbms: added test [#METR-17606]. --- .../0_stateless/00211_query_formatting_aliases.reference | 2 ++ .../queries/0_stateless/00211_query_formatting_aliases.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference index dd143e07d02..b1cd860dcc6 100644 --- a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.reference @@ -1 +1,3 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +1 1 (2,2) +1 1 (2,2) diff --git a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql index 4628b6ea26a..e1006d89d82 100644 --- a/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql +++ b/dbms/tests/queries/0_stateless/00211_query_formatting_aliases.sql @@ -4,3 +4,5 @@ SELECT toUInt64(1) IN (1234567890, 2345678901, 3456789012, 4567890123, 567890123 x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x FROM remote('127.0.0.1', system, one) SETTINGS max_query_size = 10000; + +SELECT 1 AS x, x, (SELECT 2 AS x, x) FROM remote('127.0.0.{1,2}', system.one) WHERE (3, 4) IN (SELECT 3 AS x, toUInt8(x + 1)); From ca3a36c3e16916059aed1135e2695082668cb092 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Aug 2015 02:46:15 +0300 Subject: [PATCH 27/43] dbms: addition to prev. revision [#METR-17606]. --- dbms/include/DB/Parsers/ASTFunction.h | 2 +- dbms/include/DB/Parsers/ASTIdentifier.h | 2 +- dbms/include/DB/Parsers/ASTLiteral.h | 2 +- dbms/include/DB/Parsers/ASTSubquery.h | 2 +- dbms/include/DB/Parsers/ASTWithAlias.h | 8 +++++--- dbms/src/Parsers/ASTFunction.cpp | 2 +- dbms/src/Parsers/ASTWithAlias.cpp | 4 ++-- 7 files changed, 12 insertions(+), 10 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTFunction.h b/dbms/include/DB/Parsers/ASTFunction.h index c9857b20c83..74b72b25f8b 100644 --- a/dbms/include/DB/Parsers/ASTFunction.h +++ b/dbms/include/DB/Parsers/ASTFunction.h @@ -84,7 +84,7 @@ public: } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; }; diff --git a/dbms/include/DB/Parsers/ASTIdentifier.h b/dbms/include/DB/Parsers/ASTIdentifier.h index adeb068f61f..9056e7dec58 100644 --- a/dbms/include/DB/Parsers/ASTIdentifier.h +++ b/dbms/include/DB/Parsers/ASTIdentifier.h @@ -44,7 +44,7 @@ public: } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << (settings.hilite ? hilite_identifier : ""); diff --git a/dbms/include/DB/Parsers/ASTLiteral.h b/dbms/include/DB/Parsers/ASTLiteral.h index 6ee40f78afd..17c5d87fd4e 100644 --- a/dbms/include/DB/Parsers/ASTLiteral.h +++ b/dbms/include/DB/Parsers/ASTLiteral.h @@ -28,7 +28,7 @@ public: ASTPtr clone() const override { return new ASTLiteral(*this); } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { settings.ostr << apply_visitor(FieldVisitorToString(), value); } diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index 9b044ddf0ec..7447a47350d 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -36,7 +36,7 @@ public: String getColumnName() const override { return getTreeID(); } protected: - void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override + void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); std::string nl_or_nothing = settings.one_line ? "" : "\n"; diff --git a/dbms/include/DB/Parsers/ASTWithAlias.h b/dbms/include/DB/Parsers/ASTWithAlias.h index 0ade6e26ce9..53a888baf32 100644 --- a/dbms/include/DB/Parsers/ASTWithAlias.h +++ b/dbms/include/DB/Parsers/ASTWithAlias.h @@ -7,6 +7,7 @@ namespace DB { + /** Базовый класс для AST, которые могут содержать алиас (идентификаторы, литералы, функции). */ class ASTWithAlias : public IAST @@ -21,14 +22,15 @@ public: String tryGetAlias() const override { return alias; } void setAlias(const String & to) override { alias = to; } - /// Вызывает formatImplWithAlias, а также выводит алиас. Если надо - заключает всё выражение в скобки. + /// Вызывает formatImplWithoutAlias, а также выводит алиас. Если надо - заключает всё выражение в скобки. void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override final; - virtual void formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; + virtual void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const = 0; }; /// helper for setting aliases and chaining result to other functions -inline ASTPtr setAlias(ASTPtr ast, const String & alias) { +inline ASTPtr setAlias(ASTPtr ast, const String & alias) +{ ast->setAlias(alias); return ast; }; diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index 3c2591d3376..f014f13a6ef 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -5,7 +5,7 @@ namespace DB { -void ASTFunction::formatImplWithAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const +void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { FormatStateStacked nested_need_parens = frame; FormatStateStacked nested_dont_need_parens = frame; diff --git a/dbms/src/Parsers/ASTWithAlias.cpp b/dbms/src/Parsers/ASTWithAlias.cpp index e1319fcafea..97016f7eb17 100644 --- a/dbms/src/Parsers/ASTWithAlias.cpp +++ b/dbms/src/Parsers/ASTWithAlias.cpp @@ -8,7 +8,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta if (!alias.empty()) { /// Если мы уже ранее вывели этот узел в другом месте запроса, то теперь достаточно вывести лишь алиас. - if (!state.printed_asts_with_alias.emplace(frame.current_select, getID()).second) + if (!state.printed_asts_with_alias.emplace(frame.current_select, alias).second) { WriteBufferFromOStream wb(settings.ostr, 32); writeProbablyBackQuotedString(alias, wb); @@ -20,7 +20,7 @@ void ASTWithAlias::formatImpl(const FormatSettings & settings, FormatState & sta if (frame.need_parens && !alias.empty()) settings.ostr <<'('; - formatImplWithAlias(settings, state, frame); + formatImplWithoutAlias(settings, state, frame); if (!alias.empty()) { From e24cad5123d41dd4c30f24356b6ec2e01db673d3 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 15:39:06 +0300 Subject: [PATCH 28/43] dbms: Server: Performance improvements. [#METR-17276] --- dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 08f28a14be8..8aa6edf22fb 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -99,7 +99,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -109,7 +109,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator >, 16, 16, 19, TrivialHash>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } From 311a41e14f3ff189f90497de939e1bde4e10d14a Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 18:24:23 +0300 Subject: [PATCH 29/43] Merge --- .../AggregateFunctionUniq.h | 244 ++++++---- .../DB/Common/CombinedCardinalityEstimator.h | 254 ++++++----- dbms/include/DB/Common/HashTable/HashTable.h | 50 ++- dbms/include/DB/Common/HashTable/SmallTable.h | 50 +++ .../HyperLogLogWithSmallSetOptimization.h | 18 +- dbms/include/DB/Core/ErrorCodes.h | 1 + .../00211_aggregate_function_uniq.reference | 416 ++++++++++++++++++ .../00211_aggregate_function_uniq.sql | 35 ++ 8 files changed, 874 insertions(+), 194 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference create mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 146bb6a9394..8aa6edf22fb 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -25,32 +25,7 @@ namespace DB { - -template struct AggregateFunctionUniqTraits -{ - static UInt64 hash(T x) { return x; } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float32 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - -template <> struct AggregateFunctionUniqTraits -{ - static UInt64 hash(Float64 x) - { - UInt64 res = 0; - memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); - return res; - } -}; - +/// uniq struct AggregateFunctionUniqUniquesHashSetData { @@ -60,6 +35,7 @@ struct AggregateFunctionUniqUniquesHashSetData static String getName() { return "uniq"; } }; +/// uniqHLL12 template struct AggregateFunctionUniqHLL12Data @@ -79,6 +55,7 @@ struct AggregateFunctionUniqHLL12Data static String getName() { return "uniqHLL12"; } }; +/// uniqExact template struct AggregateFunctionUniqExactData @@ -122,7 +99,7 @@ template struct AggregateFunctionUniqCombinedData { using Key = T; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -132,7 +109,7 @@ template <> struct AggregateFunctionUniqCombinedData { using Key = UInt64; - using Set = CombinedCardinalityEstimator, HashTableGrower<4> >, 16, 16, 19>; + using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; static String getName() { return "uniqCombined"; } @@ -140,75 +117,172 @@ struct AggregateFunctionUniqCombinedData namespace detail { - /** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. - * Используется для частичной специализации для добавления строк. - */ - template - struct OneAdder + +/** Хэширование 64-битных целочисленных значений в 32-битные. + * Источник: https://gist.github.com/badboy/6267743 + */ +template +struct Hash64To32; + +template +struct Hash64To32::value || std::is_same::value>::type> +{ + static UInt32 compute(T key) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); - } - }; + using U = typename std::make_unsigned::type; + auto x = static_cast(key); - template - struct OneAdder + x = (~x) + (x << 18); + x = x ^ (x >> 31); + x = x * 21; + x = x ^ (x >> 11); + x = x + (x << 6); + x = x ^ (x >> 22); + return static_cast(x); + } +}; + +/** Хэш-функция для uniqCombined. + */ +template +struct CombinedCardinalityTraits +{ + static UInt32 hash(T key) { - static void addOne(Data & data, const IColumn & column, size_t row_num) - { - /// Имейте ввиду, что вычисление приближённое. - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + return key; + } +}; - template - struct OneAdder > +template +struct CombinedCardinalityTraits::value || std::is_same::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - data.set.insert(static_cast &>(column).getData()[row_num]); - } + return Op::compute(key); }; +}; - template<> - struct OneAdder > +template +struct CombinedCardinalityTraits::value>::type> +{ + using Op = Hash64To32; + + static UInt32 hash(T key) { - static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return Op::compute(res); + } +}; - UInt128 key; - SipHash hash; - hash.update(value.data, value.size); - hash.get128(key.first, key.second); - - data.set.insert(key); - } - }; - - template - struct OneAdder > +template +struct CombinedCardinalityTraits::value>::type> +{ + static UInt32 hash(T key) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - if (data.set.isMedium()) - data.set.insert(static_cast &>(column).getData()[row_num]); - else - data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); - } - }; + UInt32 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); + return res; + } +}; - template<> - struct OneAdder > +/** Хэш-функция для uniq. + */ +template struct AggregateFunctionUniqTraits +{ + static UInt64 hash(T x) { return x; } +}; + +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float32 x) { - static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) - { - StringRef value = column.getDataAt(row_num); - data.set.insert(CityHash64(value.data, value.size)); - } - }; + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; + +template <> struct AggregateFunctionUniqTraits +{ + static UInt64 hash(Float64 x) + { + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } +}; + +/** Структура для делегации работы по добавлению одного элемента в агрегатные функции uniq. + * Используется для частичной специализации для добавления строк. + */ +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) + { + data.set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); + } +}; + +template +struct OneAdder +{ + static void addOne(Data & data, const IColumn & column, size_t row_num) + { + /// Имейте ввиду, что вычисление приближённое. + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; + +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) + { + data.set.insert(static_cast &>(column).getData()[row_num]); + } +}; + +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqExactData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + + UInt128 key; + SipHash hash; + hash.update(value.data, value.size); + hash.get128(key.first, key.second); + + data.set.insert(key); + } +}; + +template +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + const auto & value = static_cast &>(column).getData()[row_num]; + data.set.insert(CombinedCardinalityTraits::hash(value)); + } +}; + +template<> +struct OneAdder > +{ + static void addOne(AggregateFunctionUniqCombinedData & data, const IColumn & column, size_t row_num) + { + StringRef value = column.getDataAt(row_num); + data.set.insert(CityHash64(value.data, value.size)); + } +}; + } diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 43c11380668..00a01232b31 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -2,7 +2,8 @@ #include #include -#include +#include +#include namespace DB @@ -11,11 +12,11 @@ namespace DB namespace details { -enum class ContainerType { SMALL, MEDIUM, LARGE }; +enum class ContainerType : UInt8 { SMALL = 1, MEDIUM = 2, LARGE = 3 }; -ContainerType max(const ContainerType & lhs, const ContainerType & rhs) +static inline ContainerType max(const ContainerType & lhs, const ContainerType & rhs) { - unsigned int res = std::max(static_cast(lhs), static_cast(rhs)); + UInt8 res = std::max(static_cast(lhs), static_cast(rhs)); return static_cast(res); } @@ -25,38 +26,41 @@ ContainerType max(const ContainerType & lhs, const ContainerType & rhs) * Для среднего - выделяется HashSet. * Для большого - выделяется HyperLogLog. */ -template +template +< + typename Key, + typename HashContainer, + UInt8 small_set_size_max, + UInt8 medium_set_power2_max, + UInt8 K, + typename Hash = IntHash32, + typename DenominatorType = float +> class CombinedCardinalityEstimator { public: - using Self = CombinedCardinalityEstimator; + using Self = CombinedCardinalityEstimator; private: using Small = SmallSet; using Medium = HashContainer; - using Large = HyperLogLogWithSmallSetOptimization; + using Large = HyperLogLogCounter; public: + CombinedCardinalityEstimator() + { + setContainerType(details::ContainerType::SMALL); + } + ~CombinedCardinalityEstimator() { - if (container_type == details::ContainerType::MEDIUM) - { - delete medium; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(medium)); - } - else if (container_type == details::ContainerType::LARGE) - { - delete large; - - if (current_memory_tracker) - current_memory_tracker->free(sizeof(large)); - } + destroy(); } void insert(Key value) { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) { if (small.find(value) == small.end()) @@ -66,41 +70,43 @@ public: else { toMedium(); - medium->insert(value); + getContainer().insert(value); } } } else if (container_type == details::ContainerType::MEDIUM) { - if (medium->size() < medium_set_size_max) - medium->insert(value); + auto & container = getContainer(); + if (container.size() < medium_set_size_max) + container.insert(value); else { toLarge(); - large->insert(value); + getContainer().insert(value); } } else if (container_type == details::ContainerType::LARGE) - large->insert(value); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + getContainer().insert(value); } UInt32 size() const { + auto container_type = getContainerType(); + if (container_type == details::ContainerType::SMALL) return small.size(); else if (container_type == details::ContainerType::MEDIUM) - return medium->size(); + return getContainer().size(); else if (container_type == details::ContainerType::LARGE) - return large->size(); + return getContainer().size(); else throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void merge(const Self & rhs) { - details::ContainerType max_container_type = details::max(container_type, rhs.container_type); + auto container_type = getContainerType(); + auto max_container_type = details::max(container_type, rhs.getContainerType()); if (container_type != max_container_type) { @@ -110,41 +116,18 @@ public: toLarge(); } - if (container_type == details::ContainerType::SMALL) + if (rhs.getContainerType() == details::ContainerType::SMALL) { for (const auto & x : rhs.small) insert(x); } - else if (container_type == details::ContainerType::MEDIUM) + else if (rhs.getContainerType() == details::ContainerType::MEDIUM) { - if (rhs.container_type == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.container_type == details::ContainerType::MEDIUM) - { - for (const auto & x : *rhs.medium) - insert(x); - } + for (const auto & x : rhs.getContainer()) + insert(x); } - else if (container_type == details::ContainerType::LARGE) - { - if (rhs.container_type == details::ContainerType::SMALL) - { - for (const auto & x : rhs.small) - insert(x); - } - else if (rhs.container_type == details::ContainerType::MEDIUM) - { - for (const auto & x : *rhs.medium) - insert(x); - } - else if (rhs.container_type == details::ContainerType::LARGE) - large->merge(*rhs.large); - } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); + else if (rhs.getContainerType() == details::ContainerType::LARGE) + getContainer().merge(rhs.getContainer()); } /// Можно вызывать только для пустого объекта. @@ -152,79 +135,96 @@ public: { UInt8 v; readBinary(v, in); - details::ContainerType t = static_cast(v); + auto container_type = static_cast(v); - if (t == details::ContainerType::SMALL) + if (container_type == details::ContainerType::SMALL) small.read(in); - else if (t == details::ContainerType::MEDIUM) + else if (container_type == details::ContainerType::MEDIUM) { toMedium(); - medium->read(in); + getContainer().read(in); } - else if (t == details::ContainerType::LARGE) + else if (container_type == details::ContainerType::LARGE) { toLarge(); - large->read(in); + getContainer().read(in); } - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); } void readAndMerge(DB::ReadBuffer & in) { - Self other; - other.read(in); - merge(other); + auto container_type = getContainerType(); + + UInt8 v; + readBinary(v, in); + auto rhs_container_type = static_cast(v); + + auto max_container_type = details::max(container_type, rhs_container_type); + + if (container_type != max_container_type) + { + if (max_container_type == details::ContainerType::MEDIUM) + toMedium(); + else if (max_container_type == details::ContainerType::LARGE) + toLarge(); + } + + if (rhs_container_type == details::ContainerType::SMALL) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::MEDIUM) + { + typename Medium::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else if (rhs_container_type == details::ContainerType::LARGE) + getContainer().readAndMerge(in); } void write(DB::WriteBuffer & out) const { - UInt8 v = static_cast(container_type); - writeBinary(v, out); + auto container_type = getContainerType(); + writeBinary(static_cast(container_type), out); if (container_type == details::ContainerType::SMALL) small.write(out); else if (container_type == details::ContainerType::MEDIUM) - medium->write(out); + getContainer().write(out); else if (container_type == details::ContainerType::LARGE) - large->write(out); - else - throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - } - - bool isMedium() const - { - return container_type == details::ContainerType::MEDIUM; + getContainer().write(out); } private: void toMedium() { - if (container_type != details::ContainerType::SMALL) + if (getContainerType() != details::ContainerType::SMALL) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(medium)); - - Medium * tmp_medium = new Medium; + auto tmp_medium = std::make_unique(); for (const auto & x : small) tmp_medium->insert(x); - medium = tmp_medium; + new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - container_type = details::ContainerType::MEDIUM; + setContainerType(details::ContainerType::MEDIUM); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(medium)); } void toLarge() { + auto container_type = getContainerType(); + if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - if (current_memory_tracker) - current_memory_tracker->alloc(sizeof(large)); - - Large * tmp_large = new Large; + auto tmp_large = std::make_unique(); if (container_type == details::ContainerType::SMALL) { @@ -233,30 +233,78 @@ private: } else if (container_type == details::ContainerType::MEDIUM) { - for (const auto & x : *medium) + for (const auto & x : getContainer()) tmp_large->insert(x); + + destroy(); } - large = tmp_large; + new (&large) std::unique_ptr{ std::move(tmp_large) }; + + setContainerType(details::ContainerType::LARGE); + + if (current_memory_tracker) + current_memory_tracker->alloc(sizeof(large)); + + } + + void NO_INLINE destroy() + { + auto container_type = getContainerType(); + + clearContainerType(); if (container_type == details::ContainerType::MEDIUM) { - delete medium; - medium = nullptr; - + medium.std::unique_ptr::~unique_ptr(); if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } + else if (container_type == details::ContainerType::LARGE) + { + large.std::unique_ptr::~unique_ptr(); + if (current_memory_tracker) + current_memory_tracker->free(sizeof(large)); + } + } - container_type = details::ContainerType::LARGE; + template + inline T & getContainer() + { + return *reinterpret_cast(address & mask); + } + + template + inline const T & getContainer() const + { + return *reinterpret_cast(address & mask); + } + + void setContainerType(details::ContainerType t) + { + address |= static_cast(t); + } + + inline details::ContainerType getContainerType() const + { + return static_cast(address & ~mask); + } + + void clearContainerType() + { + address &= mask; } private: Small small; - Medium * medium = nullptr; - Large * large = nullptr; - const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; - details::ContainerType container_type = details::ContainerType::SMALL; + union + { + std::unique_ptr medium; + std::unique_ptr large; + UInt64 address = 0; + }; + static const UInt64 mask = 0xFFFFFFFC; + static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; }; } diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 67196746ae5..0b216e1ca0e 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -251,6 +251,7 @@ class HashTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; template friend class TwoLevelHashTable; @@ -429,6 +430,51 @@ public: free(); } + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return cell.getValue(); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; class iterator { @@ -757,7 +803,7 @@ public: { Cell x; x.read(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } @@ -781,7 +827,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(Cell::getKey(x.getValue())); + insert(x.getValue()); } } diff --git a/dbms/include/DB/Common/HashTable/SmallTable.h b/dbms/include/DB/Common/HashTable/SmallTable.h index 10ec8479b93..c68963a4798 100644 --- a/dbms/include/DB/Common/HashTable/SmallTable.h +++ b/dbms/include/DB/Common/HashTable/SmallTable.h @@ -27,6 +27,7 @@ class SmallTable : protected: friend class const_iterator; friend class iterator; + friend class Reader; typedef SmallTable Self; typedef Cell cell_type; @@ -66,6 +67,55 @@ public: typedef typename Cell::value_type value_type; + class Reader final : private Cell::State + { + public: + Reader(DB::ReadBuffer & in_) + : in(in_) + { + } + + Reader(const Reader &) = delete; + Reader & operator=(const Reader &) = delete; + + bool next() + { + if (read_count == size) + { + is_eof = true; + return false; + } + else if (read_count == 0) + { + Cell::State::read(in); + DB::readVarUInt(size, in); + + if (size > capacity) + throw DB::Exception("Illegal size"); + } + + cell.read(in); + ++read_count; + + return true; + } + + inline const value_type & get() const + { + if ((read_count == 0) || is_eof) + throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA); + + return cell.getValue(); + } + + private: + DB::ReadBuffer in; + Cell cell; + size_t read_count = 0; + size_t size; + bool is_eof = false; + }; + class iterator { Self * container; diff --git a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h index 7932ddfb0e8..405f7c5ca12 100644 --- a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h @@ -114,10 +114,20 @@ public: void readAndMerge(DB::ReadBuffer & in) { - /// Немного не оптимально. - HyperLogLogWithSmallSetOptimization other; - other.read(in); - merge(other); + bool is_rhs_large; + readBinary(is_rhs_large, in); + + if (!isLarge() && is_rhs_large) + toLarge(); + + if (!is_rhs_large) + { + typename Small::Reader reader(in); + while (reader.next()) + insert(reader.get()); + } + else + large->readAndMerge(in); } void write(DB::WriteBuffer & out) const diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 937b06d5ce0..8fae35ea601 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -283,6 +283,7 @@ namespace ErrorCodes INDEX_NOT_USED = 277, LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, + NO_AVAILABLE_DATA = 280, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference new file mode 100644 index 00000000000..64f3c19bb38 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference @@ -0,0 +1,416 @@ +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 159 +1 164 +3 165 +6 162 +7 160 +9 164 +10 81 +11 158 +13 161 +14 160 +17 163 +19 164 +20 159 +21 161 +22 159 +26 160 +31 164 +35 160 +36 161 +0 54571 +1 55013 +3 52912 +6 52353 +7 54011 +9 54138 +10 26870 +11 54554 +13 53951 +14 53396 +17 55227 +19 55115 +20 54370 +21 54268 +22 54620 +26 53394 +31 54151 +35 54328 +36 52997 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 161 +0.125 160 +0.5 164 +0.05 164 +0.143 162 +0.091 81 +0.056 163 +0.048 159 +0.083 158 +0.25 165 +1 159 +0.1 164 +0.028 160 +0.027 161 +0.031 164 +0.067 160 +0.043 159 +0.037 160 +0.071 161 +0.045 54268 +0.125 54011 +0.5 55013 +0.05 55115 +0.143 52353 +0.091 26870 +0.056 55227 +0.048 54370 +0.083 54554 +0.25 52912 +1 54571 +0.1 54138 +0.028 54328 +0.027 52997 +0.031 54151 +0.067 53396 +0.043 54620 +0.037 53394 +0.071 53951 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 164 +0.05 164 +0.25 165 +0.048 159 +0.091 81 +0.043 159 +0.071 161 +0.083 158 +0.125 160 +0.031 164 +0.143 162 +0.028 160 +0.067 160 +0.045 161 +0.027 161 +0.056 163 +0.037 160 +0.1 164 +1 159 +0.5 55013 +0.05 55115 +0.25 52912 +0.048 54370 +0.091 26870 +0.043 54620 +0.071 53951 +0.083 54554 +0.125 54011 +0.031 54151 +0.143 52353 +0.028 54328 +0.067 53396 +0.045 54268 +0.027 52997 +0.056 55227 +0.037 53394 +0.1 54138 +1 54571 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 158 +3 162 +6 163 +7 162 +9 162 +10 79 +11 162 +13 163 +14 160 +17 163 +19 158 +20 162 +21 157 +22 164 +26 162 +31 161 +35 162 +36 163 +0 54029 +1 53772 +3 53540 +6 54012 +7 53910 +9 52761 +10 26462 +11 52701 +13 54505 +14 53790 +17 54064 +19 55420 +20 56686 +21 52639 +22 54251 +26 53827 +31 53574 +35 55022 +36 53961 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54053 +6 54054 +7 54054 +9 54053 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54053 +22 54054 +26 54054 +31 54054 +35 54054 +36 54053 +0.125 1 +0.5 1 +0.05 1 +0.143 1 +0.056 1 +0.048 2 +0.083 1 +0.25 1 +0.1 1 +0.028 1 +0.027 1 +0.031 1 +0.067 1 +0.037 1 +0.045 162 +0.125 163 +0.5 162 +0.05 162 +0.143 162 +0.091 81 +0.056 162 +0.048 162 +0.083 163 +0.25 162 +1 162 +0.1 163 +0.028 162 +0.027 162 +0.031 162 +0.067 162 +0.043 162 +0.037 162 +0.071 162 +0.045 54053 +0.125 54054 +0.5 54054 +0.05 54054 +0.143 54054 +0.091 27027 +0.056 54054 +0.048 54054 +0.083 54055 +0.25 54053 +1 54054 +0.1 54053 +0.028 54054 +0.027 54053 +0.031 54054 +0.067 54054 +0.043 54054 +0.037 54054 +0.071 54054 +0.5 1 +0.05 1 +0.25 1 +0.048 2 +0.083 1 +0.125 1 +0.031 1 +0.143 1 +0.028 1 +0.067 1 +0.027 1 +0.056 1 +0.037 1 +0.1 1 +0.5 162 +0.05 162 +0.25 162 +0.048 162 +0.091 81 +0.043 162 +0.071 162 +0.083 163 +0.125 163 +0.031 162 +0.143 162 +0.028 162 +0.067 162 +0.045 162 +0.027 162 +0.056 162 +0.037 162 +0.1 163 +1 162 +0.5 54054 +0.05 54054 +0.25 54053 +0.048 54054 +0.091 27027 +0.043 54054 +0.071 54054 +0.083 54055 +0.125 54054 +0.031 54054 +0.143 54054 +0.028 54054 +0.067 54054 +0.045 54053 +0.027 54053 +0.056 54054 +0.037 54054 +0.1 54053 +1 54054 +1 1 +3 1 +6 1 +7 1 +9 1 +11 1 +14 1 +17 1 +19 1 +20 2 +26 1 +31 1 +35 1 +36 1 +0 162 +1 162 +3 162 +6 162 +7 163 +9 163 +10 81 +11 163 +13 162 +14 162 +17 162 +19 162 +20 162 +21 162 +22 162 +26 162 +31 162 +35 162 +36 162 +0 54054 +1 54054 +3 54054 +6 54054 +7 54054 +9 54054 +10 27027 +11 54055 +13 54054 +14 54054 +17 54054 +19 54054 +20 54054 +21 54054 +22 54054 +26 54054 +31 54054 +35 54054 +36 54054 diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql new file mode 100644 index 00000000000..2886daeb3b3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql @@ -0,0 +1,35 @@ +/* uniqHLL12 */ + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +/* uniqCombined */ + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; + +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; +SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; From a68b3891f872fb1ca5d2e1fcb68e2e15b63ac9b2 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 19:59:15 +0300 Subject: [PATCH 30/43] dbms: Server: Updated functional tests. [#METR-17276] --- .../00211_aggregate_function_uniq.reference | 152 +++++++++--------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference index 64f3c19bb38..288258c7d81 100644 --- a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference @@ -239,25 +239,25 @@ 31 162 35 162 36 162 -0 54054 -1 54054 -3 54053 -6 54054 -7 54054 -9 54053 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54053 -22 54054 -26 54054 -31 54054 -35 54054 -36 54053 +0 53988 +1 54083 +3 53994 +6 53948 +7 54209 +9 54112 +10 27000 +11 54058 +13 54158 +14 53926 +17 54094 +19 54127 +20 54065 +21 54207 +22 54056 +26 53982 +31 54156 +35 53960 +36 54076 0.125 1 0.5 1 0.05 1 @@ -291,25 +291,25 @@ 0.043 162 0.037 162 0.071 162 -0.045 54053 -0.125 54054 -0.5 54054 -0.05 54054 -0.143 54054 -0.091 27027 -0.056 54054 -0.048 54054 -0.083 54055 -0.25 54053 -1 54054 -0.1 54053 -0.028 54054 -0.027 54053 -0.031 54054 -0.067 54054 -0.043 54054 -0.037 54054 -0.071 54054 +0.045 54207 +0.125 54209 +0.5 54083 +0.05 54127 +0.143 53948 +0.091 27000 +0.056 54094 +0.048 54065 +0.083 54058 +0.25 53994 +1 53988 +0.1 54112 +0.028 53960 +0.027 54076 +0.031 54156 +0.067 53926 +0.043 54056 +0.037 53982 +0.071 54158 0.5 1 0.05 1 0.25 1 @@ -343,25 +343,25 @@ 0.037 162 0.1 163 1 162 -0.5 54054 -0.05 54054 -0.25 54053 -0.048 54054 -0.091 27027 -0.043 54054 -0.071 54054 -0.083 54055 -0.125 54054 -0.031 54054 -0.143 54054 -0.028 54054 -0.067 54054 -0.045 54053 -0.027 54053 -0.056 54054 -0.037 54054 -0.1 54053 -1 54054 +0.5 54083 +0.05 54127 +0.25 53994 +0.048 54065 +0.091 27000 +0.043 54056 +0.071 54158 +0.083 54058 +0.125 54209 +0.031 54156 +0.143 53948 +0.028 53960 +0.067 53926 +0.045 54207 +0.027 54076 +0.056 54094 +0.037 53982 +0.1 54112 +1 53988 1 1 3 1 6 1 @@ -395,22 +395,22 @@ 31 162 35 162 36 162 -0 54054 -1 54054 -3 54054 -6 54054 -7 54054 -9 54054 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54054 -22 54054 -26 54054 -31 54054 -35 54054 -36 54054 +0 54195 +1 54086 +3 54127 +6 54173 +7 53969 +9 54257 +10 26985 +11 53880 +13 54105 +14 54043 +17 54176 +19 53913 +20 54088 +21 53991 +22 54112 +26 54136 +31 54074 +35 54153 +36 53999 From 1bf22c463b6ce99c66b08bcf273e279e93088e45 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 20:09:02 +0300 Subject: [PATCH 31/43] dbms: Server: Renamed functional test. [#METR-17276] --- ...ion_uniq.reference => 00212_aggregate_function_uniq.referemce} | 0 ...regate_function_uniq.sql => 00212_aggregate_function_uniq.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename dbms/tests/queries/0_stateless/{00211_aggregate_function_uniq.reference => 00212_aggregate_function_uniq.referemce} (100%) rename dbms/tests/queries/0_stateless/{00211_aggregate_function_uniq.sql => 00212_aggregate_function_uniq.sql} (100%) diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce similarity index 100% rename from dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference rename to dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.sql similarity index 100% rename from dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql rename to dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.sql From ed2ec39899544eb9cc8e4d5fd42048130612fcdb Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Fri, 7 Aug 2015 20:35:34 +0300 Subject: [PATCH 32/43] dbms: Server: Fixes. [#METR-17276] --- dbms/include/DB/Common/HashTable/HashTable.h | 4 ++-- ...uniq.referemce => 00212_aggregate_function_uniq.reference} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename dbms/tests/queries/0_stateless/{00212_aggregate_function_uniq.referemce => 00212_aggregate_function_uniq.reference} (100%) diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 0b216e1ca0e..02c157035a8 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -803,7 +803,7 @@ public: { Cell x; x.read(rb); - insert(x.getValue()); + insert(x); } } @@ -827,7 +827,7 @@ public: Cell x; DB::assertString(",", rb); x.readText(rb); - insert(x.getValue()); + insert(x); } } diff --git a/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference similarity index 100% rename from dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.referemce rename to dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference From 071c1af212082e5fb5e05e5a12f501448d06b406 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Aug 2015 08:10:43 +0300 Subject: [PATCH 33/43] dbms: fixed error [#METR-17606]. --- dbms/include/DB/Parsers/ASTCreateQuery.h | 7 ++++--- dbms/include/DB/Parsers/ASTJoin.h | 4 +++- dbms/include/DB/Parsers/ASTSubquery.h | 7 ++++--- dbms/include/DB/Parsers/IAST.h | 2 +- dbms/src/Parsers/ASTFunction.cpp | 2 -- dbms/src/Parsers/ASTSelectQuery.cpp | 4 +++- 6 files changed, 15 insertions(+), 11 deletions(-) diff --git a/dbms/include/DB/Parsers/ASTCreateQuery.h b/dbms/include/DB/Parsers/ASTCreateQuery.h index 57cfc1a35b2..b76cfedc2d2 100644 --- a/dbms/include/DB/Parsers/ASTCreateQuery.h +++ b/dbms/include/DB/Parsers/ASTCreateQuery.h @@ -30,7 +30,7 @@ public: ASTCreateQuery() = default; ASTCreateQuery(const StringRange range_) : IAST(range_) {} - + /** Получить текст, который идентифицирует этот элемент. */ String getID() const override { return (attach ? "AttachQuery_" : "CreateQuery_") + database + "_" + table; }; @@ -87,8 +87,9 @@ protected: if (columns) { settings.ostr << (settings.one_line ? " (" : "\n("); - ++frame.indent; - columns->formatImpl(settings, state, frame); + FormatStateStacked frame_nested = frame; + ++frame_nested.indent; + columns->formatImpl(settings, state, frame_nested); settings.ostr << (settings.one_line ? ")" : "\n)"); } diff --git a/dbms/include/DB/Parsers/ASTJoin.h b/dbms/include/DB/Parsers/ASTJoin.h index 029d4f49350..859b7b3cfcf 100644 --- a/dbms/include/DB/Parsers/ASTJoin.h +++ b/dbms/include/DB/Parsers/ASTJoin.h @@ -106,7 +106,9 @@ protected: settings.ostr << "JOIN " << (settings.hilite ? hilite_none : ""); - table->formatImpl(settings, state, frame); + FormatStateStacked frame_with_indent = frame; + ++frame_with_indent.indent; + table->formatImpl(settings, state, frame_with_indent); if (kind != ASTJoin::Cross) { diff --git a/dbms/include/DB/Parsers/ASTSubquery.h b/dbms/include/DB/Parsers/ASTSubquery.h index 7447a47350d..55d7621129f 100644 --- a/dbms/include/DB/Parsers/ASTSubquery.h +++ b/dbms/include/DB/Parsers/ASTSubquery.h @@ -42,9 +42,10 @@ protected: std::string nl_or_nothing = settings.one_line ? "" : "\n"; settings.ostr << nl_or_nothing << indent_str << "(" << nl_or_nothing; - FormatStateStacked frame_dont_need_parens = frame; - frame_dont_need_parens.need_parens = false; - children[0]->formatImpl(settings, state, frame_dont_need_parens); + FormatStateStacked frame_nested = frame; + frame_nested.need_parens = false; + ++frame_nested.indent; + children[0]->formatImpl(settings, state, frame_nested); settings.ostr << nl_or_nothing << indent_str << ")"; } }; diff --git a/dbms/include/DB/Parsers/IAST.h b/dbms/include/DB/Parsers/IAST.h index 2f3bc41db60..e92b0877c7a 100644 --- a/dbms/include/DB/Parsers/IAST.h +++ b/dbms/include/DB/Parsers/IAST.h @@ -164,7 +164,7 @@ public: /// Состояние, которое копируется при форматировании каждого узла. Например, уровень вложенности. struct FormatStateStacked { - bool indent = 0; + UInt8 indent = 0; bool need_parens = false; const IAST * current_select = nullptr; }; diff --git a/dbms/src/Parsers/ASTFunction.cpp b/dbms/src/Parsers/ASTFunction.cpp index f014f13a6ef..8a4b1083bc2 100644 --- a/dbms/src/Parsers/ASTFunction.cpp +++ b/dbms/src/Parsers/ASTFunction.cpp @@ -39,8 +39,6 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (name == "negate" && typeid_cast(&*arguments->children[0])) settings.ostr << ' '; - FormatStateStacked nested_frame; - nested_frame.need_parens = true; arguments->formatImpl(settings, state, nested_need_parens); written = true; } diff --git a/dbms/src/Parsers/ASTSelectQuery.cpp b/dbms/src/Parsers/ASTSelectQuery.cpp index 35c4214e75c..fbb22ad0dd0 100644 --- a/dbms/src/Parsers/ASTSelectQuery.cpp +++ b/dbms/src/Parsers/ASTSelectQuery.cpp @@ -248,7 +248,9 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F else s.ostr << "\n" << indent_str << "(\n"; - table->formatImpl(s, state, frame); + FormatStateStacked frame_with_indent = frame; + ++frame_with_indent.indent; + table->formatImpl(s, state, frame_with_indent); if (s.one_line) s.ostr << ")"; From 753a90b9302cac0f3e8e87c0009dcf623c5d5e50 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 10 Aug 2015 13:05:03 +0300 Subject: [PATCH 34/43] dbms: Server: Deleted obsolete files. [#METR-17276] --- .../00211_aggregate_function_uniq.reference | 416 ------------------ .../00211_aggregate_function_uniq.sql | 35 -- 2 files changed, 451 deletions(-) delete mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference delete mode 100644 dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference deleted file mode 100644 index 64f3c19bb38..00000000000 --- a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.reference +++ /dev/null @@ -1,416 +0,0 @@ -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 159 -1 164 -3 165 -6 162 -7 160 -9 164 -10 81 -11 158 -13 161 -14 160 -17 163 -19 164 -20 159 -21 161 -22 159 -26 160 -31 164 -35 160 -36 161 -0 54571 -1 55013 -3 52912 -6 52353 -7 54011 -9 54138 -10 26870 -11 54554 -13 53951 -14 53396 -17 55227 -19 55115 -20 54370 -21 54268 -22 54620 -26 53394 -31 54151 -35 54328 -36 52997 -0.125 1 -0.5 1 -0.05 1 -0.143 1 -0.056 1 -0.048 2 -0.083 1 -0.25 1 -0.1 1 -0.028 1 -0.027 1 -0.031 1 -0.067 1 -0.037 1 -0.045 161 -0.125 160 -0.5 164 -0.05 164 -0.143 162 -0.091 81 -0.056 163 -0.048 159 -0.083 158 -0.25 165 -1 159 -0.1 164 -0.028 160 -0.027 161 -0.031 164 -0.067 160 -0.043 159 -0.037 160 -0.071 161 -0.045 54268 -0.125 54011 -0.5 55013 -0.05 55115 -0.143 52353 -0.091 26870 -0.056 55227 -0.048 54370 -0.083 54554 -0.25 52912 -1 54571 -0.1 54138 -0.028 54328 -0.027 52997 -0.031 54151 -0.067 53396 -0.043 54620 -0.037 53394 -0.071 53951 -0.5 1 -0.05 1 -0.25 1 -0.048 2 -0.083 1 -0.125 1 -0.031 1 -0.143 1 -0.028 1 -0.067 1 -0.027 1 -0.056 1 -0.037 1 -0.1 1 -0.5 164 -0.05 164 -0.25 165 -0.048 159 -0.091 81 -0.043 159 -0.071 161 -0.083 158 -0.125 160 -0.031 164 -0.143 162 -0.028 160 -0.067 160 -0.045 161 -0.027 161 -0.056 163 -0.037 160 -0.1 164 -1 159 -0.5 55013 -0.05 55115 -0.25 52912 -0.048 54370 -0.091 26870 -0.043 54620 -0.071 53951 -0.083 54554 -0.125 54011 -0.031 54151 -0.143 52353 -0.028 54328 -0.067 53396 -0.045 54268 -0.027 52997 -0.056 55227 -0.037 53394 -0.1 54138 -1 54571 -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 162 -1 158 -3 162 -6 163 -7 162 -9 162 -10 79 -11 162 -13 163 -14 160 -17 163 -19 158 -20 162 -21 157 -22 164 -26 162 -31 161 -35 162 -36 163 -0 54029 -1 53772 -3 53540 -6 54012 -7 53910 -9 52761 -10 26462 -11 52701 -13 54505 -14 53790 -17 54064 -19 55420 -20 56686 -21 52639 -22 54251 -26 53827 -31 53574 -35 55022 -36 53961 -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 162 -1 162 -3 162 -6 162 -7 163 -9 163 -10 81 -11 163 -13 162 -14 162 -17 162 -19 162 -20 162 -21 162 -22 162 -26 162 -31 162 -35 162 -36 162 -0 54054 -1 54054 -3 54053 -6 54054 -7 54054 -9 54053 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54053 -22 54054 -26 54054 -31 54054 -35 54054 -36 54053 -0.125 1 -0.5 1 -0.05 1 -0.143 1 -0.056 1 -0.048 2 -0.083 1 -0.25 1 -0.1 1 -0.028 1 -0.027 1 -0.031 1 -0.067 1 -0.037 1 -0.045 162 -0.125 163 -0.5 162 -0.05 162 -0.143 162 -0.091 81 -0.056 162 -0.048 162 -0.083 163 -0.25 162 -1 162 -0.1 163 -0.028 162 -0.027 162 -0.031 162 -0.067 162 -0.043 162 -0.037 162 -0.071 162 -0.045 54053 -0.125 54054 -0.5 54054 -0.05 54054 -0.143 54054 -0.091 27027 -0.056 54054 -0.048 54054 -0.083 54055 -0.25 54053 -1 54054 -0.1 54053 -0.028 54054 -0.027 54053 -0.031 54054 -0.067 54054 -0.043 54054 -0.037 54054 -0.071 54054 -0.5 1 -0.05 1 -0.25 1 -0.048 2 -0.083 1 -0.125 1 -0.031 1 -0.143 1 -0.028 1 -0.067 1 -0.027 1 -0.056 1 -0.037 1 -0.1 1 -0.5 162 -0.05 162 -0.25 162 -0.048 162 -0.091 81 -0.043 162 -0.071 162 -0.083 163 -0.125 163 -0.031 162 -0.143 162 -0.028 162 -0.067 162 -0.045 162 -0.027 162 -0.056 162 -0.037 162 -0.1 163 -1 162 -0.5 54054 -0.05 54054 -0.25 54053 -0.048 54054 -0.091 27027 -0.043 54054 -0.071 54054 -0.083 54055 -0.125 54054 -0.031 54054 -0.143 54054 -0.028 54054 -0.067 54054 -0.045 54053 -0.027 54053 -0.056 54054 -0.037 54054 -0.1 54053 -1 54054 -1 1 -3 1 -6 1 -7 1 -9 1 -11 1 -14 1 -17 1 -19 1 -20 2 -26 1 -31 1 -35 1 -36 1 -0 162 -1 162 -3 162 -6 162 -7 163 -9 163 -10 81 -11 163 -13 162 -14 162 -17 162 -19 162 -20 162 -21 162 -22 162 -26 162 -31 162 -35 162 -36 162 -0 54054 -1 54054 -3 54054 -6 54054 -7 54054 -9 54054 -10 27027 -11 54055 -13 54054 -14 54054 -17 54054 -19 54054 -20 54054 -21 54054 -22 54054 -26 54054 -31 54054 -35 54054 -36 54054 diff --git a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql b/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql deleted file mode 100644 index 2886daeb3b3..00000000000 --- a/dbms/tests/queries/0_stateless/00211_aggregate_function_uniq.sql +++ /dev/null @@ -1,35 +0,0 @@ -/* uniqHLL12 */ - -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqHLL12(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -/* uniqCombined */ - -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(1/(1 + (3*X*X - 7*X + 11) % 37), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(X) FROM (SELECT number AS X, round(toFloat32(1/(1 + (3*X*X - 7*X + 11) % 37)), 3) AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; - -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 15) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 3000) GROUP BY Y; -SELECT Y, uniqCombined(Z) FROM (SELECT number AS X, IPv4NumToString(toUInt32(X)) AS Z, (3*X*X - 7*X + 11) % 37 AS Y FROM system.numbers LIMIT 1000000) GROUP BY Y; From 99f0783b04895cdb02601f90d31704541c549d44 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 10 Aug 2015 16:47:43 +0300 Subject: [PATCH 35/43] dbms: Server: Fixed pointer computation + simplified code. [#METR-17276] --- .../DB/Common/CombinedCardinalityEstimator.h | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 00a01232b31..4e8669c0af1 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -209,8 +209,7 @@ private: for (const auto & x : small) tmp_medium->insert(x); - new (&medium) std::unique_ptr{ std::move(tmp_medium) }; - + medium = tmp_medium.release(); setContainerType(details::ContainerType::MEDIUM); if (current_memory_tracker) @@ -239,8 +238,7 @@ private: destroy(); } - new (&large) std::unique_ptr{ std::move(tmp_large) }; - + large = tmp_large.release(); setContainerType(details::ContainerType::LARGE); if (current_memory_tracker) @@ -256,13 +254,17 @@ private: if (container_type == details::ContainerType::MEDIUM) { - medium.std::unique_ptr::~unique_ptr(); + delete medium; + medium = nullptr; + if (current_memory_tracker) current_memory_tracker->free(sizeof(medium)); } else if (container_type == details::ContainerType::LARGE) { - large.std::unique_ptr::~unique_ptr(); + delete large; + large = nullptr; + if (current_memory_tracker) current_memory_tracker->free(sizeof(large)); } @@ -282,6 +284,7 @@ private: void setContainerType(details::ContainerType t) { + address &= mask; address |= static_cast(t); } @@ -299,11 +302,11 @@ private: Small small; union { - std::unique_ptr medium; - std::unique_ptr large; + Medium * medium; + Large * large; UInt64 address = 0; }; - static const UInt64 mask = 0xFFFFFFFC; + static const UInt64 mask = 0xFFFFFFFFFFFFFFFC; static const UInt32 medium_set_size_max = 1UL << medium_set_power2_max; }; From 9e1486bfdeed3b29c3d7101af7aeef425ccda613 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Tue, 11 Aug 2015 20:31:31 +0300 Subject: [PATCH 36/43] =?UTF-8?q?dbms:=20Server:=20Fixed=20interaction=20w?= =?UTF-8?q?ith=20=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88?= =?UTF-8?q?=E2=96=88=E2=96=88=E2=96=88=E2=96=88=E2=96=88.=20[#METR-17276]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dbms/include/DB/Common/CombinedCardinalityEstimator.h | 2 +- dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Common/CombinedCardinalityEstimator.h b/dbms/include/DB/Common/CombinedCardinalityEstimator.h index 4e8669c0af1..c6a2ff2d119 100644 --- a/dbms/include/DB/Common/CombinedCardinalityEstimator.h +++ b/dbms/include/DB/Common/CombinedCardinalityEstimator.h @@ -34,7 +34,7 @@ template UInt8 medium_set_power2_max, UInt8 K, typename Hash = IntHash32, - typename DenominatorType = float + typename DenominatorType = double > class CombinedCardinalityEstimator { diff --git a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h index 405f7c5ca12..e95811ce27b 100644 --- a/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/include/DB/Common/HyperLogLogWithSmallSetOptimization.h @@ -16,7 +16,7 @@ template < UInt8 small_set_size, UInt8 K, typename Hash = IntHash32, - typename DenominatorType = float> + typename DenominatorType = double> class HyperLogLogWithSmallSetOptimization { private: From 6448560938c39c615f4c67203127bc343990e98c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 00:11:54 +0300 Subject: [PATCH 37/43] dbms: allowed to specify 'interserver_http_host' in metrika.xml for metrika package [#MTRSADMIN-1483]. --- dbms/src/Server/Server.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 3c2e42a5ccb..a2bc499db88 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -485,15 +485,12 @@ int Server::main(const std::vector & args) if (config().has("interserver_http_port")) { - String this_host; - if (config().has("interserver_http_host")) - { - this_host = config().getString("interserver_http_host"); - } - else + String this_host = config().getString("interserver_http_host", ""); + + if (this_host.empty()) { this_host = getFQDNOrHostName(); - LOG_DEBUG(log, "Configuration parameter 'interserver_http_host' doesn't exist. Will use '" + this_host + "' as replica host."); + LOG_DEBUG(log, "Configuration parameter 'interserver_http_host' doesn't exist or exists and empty. Will use '" + this_host + "' as replica host."); } String port_str = config().getString("interserver_http_port"); From 8d8fa9d0fdea89e69d9fe8f0a0eb9bb22178d724 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 00:29:44 +0300 Subject: [PATCH 38/43] dbms: more logging in MySQLDictionarySource [#METR-17508]. --- dbms/include/DB/Dictionaries/MySQLDictionarySource.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h index c0ee3974d49..e791246c35c 100644 --- a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h @@ -65,6 +65,8 @@ public: } private: + Logger * log = &Logger::get("MySQLDictionarySource"); + mysqlxx::DateTime getLastModification() const { const auto Update_time_idx = 12; @@ -74,6 +76,9 @@ private: { auto connection = pool.Get(); auto query = connection->query("SHOW TABLE STATUS LIKE '%" + strconvert::escaped_for_like(table) + "%';"); + + LOG_TRACE(log, query.str()); + auto result = query.use(); if (auto row = result.fetch()) @@ -144,6 +149,8 @@ private: writeChar(';', out); } + LOG_TRACE(log, query); + return query; } From f39ad593f595fdb6902171239bde44b1cf67057b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 00:32:27 +0300 Subject: [PATCH 39/43] dbms: added optional property 'require_nonempty' for external dictionaries [#METR-17508]. --- dbms/include/DB/Core/ErrorCodes.h | 1 + dbms/include/DB/Dictionaries/FlatDictionary.h | 11 ++++++++--- .../DB/Dictionaries/HashedDictionary.h | 11 ++++++++--- .../DB/Dictionaries/RangeHashedDictionary.h | 11 ++++++++--- dbms/src/Interpreters/DictionaryFactory.cpp | 19 ++++++++++++------- 5 files changed, 37 insertions(+), 16 deletions(-) diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index 8fae35ea601..3336d80fcf4 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -284,6 +284,7 @@ namespace ErrorCodes LEADERSHIP_LOST = 278, ALL_CONNECTION_TRIES_FAILED = 279, NO_AVAILABLE_DATA = 280, + DICTIONARY_IS_EMPTY = 281, KEEPER_EXCEPTION = 999, POCO_EXCEPTION = 1000, diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index 1ae5b976fde..b1267bc618f 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -20,9 +20,10 @@ class FlatDictionary final : public IDictionary { public: FlatDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime) + DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty) : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), + require_nonempty(require_nonempty) { createAttributes(); @@ -40,7 +41,7 @@ public: } FlatDictionary(const FlatDictionary & other) - : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} + : FlatDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} {} std::exception_ptr getCreationException() const override { return creation_exception; } @@ -198,6 +199,9 @@ private: } stream->readSuffix(); + + if (require_nonempty && 0 == element_count) + throw Exception("Dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY); } template @@ -348,6 +352,7 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const bool require_nonempty; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index 08aad57d63a..e356808fec5 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -18,9 +18,10 @@ class HashedDictionary final : public IDictionary { public: HashedDictionary(const std::string & name, const DictionaryStructure & dict_struct, - DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime) + DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, bool require_nonempty) : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), + require_nonempty(require_nonempty) { createAttributes(); @@ -38,7 +39,7 @@ public: } HashedDictionary(const HashedDictionary & other) - : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} + : HashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} {} std::exception_ptr getCreationException() const override { return creation_exception; } @@ -196,6 +197,9 @@ private: } stream->readSuffix(); + + if (require_nonempty && 0 == element_count) + throw Exception("Dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY); } template @@ -334,6 +338,7 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const bool require_nonempty; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h index 0435baecc93..a00208c5b48 100644 --- a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h +++ b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h @@ -19,9 +19,10 @@ class RangeHashedDictionary final : public IDictionaryBase public: RangeHashedDictionary( const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, - const DictionaryLifetime dict_lifetime) + const DictionaryLifetime dict_lifetime, bool require_nonempty) : name{name}, dict_struct(dict_struct), - source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime) + source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), + require_nonempty(require_nonempty) { createAttributes(); @@ -39,7 +40,7 @@ public: } RangeHashedDictionary(const RangeHashedDictionary & other) - : RangeHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime} + : RangeHashedDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty} {} std::exception_ptr getCreationException() const override { return creation_exception; } @@ -218,6 +219,9 @@ private: } stream->readSuffix(); + + if (require_nonempty && 0 == element_count) + throw Exception("Dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY); } template @@ -410,6 +414,7 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; + const bool require_nonempty; std::map attribute_index_by_name; std::vector attributes; diff --git a/dbms/src/Interpreters/DictionaryFactory.cpp b/dbms/src/Interpreters/DictionaryFactory.cpp index 23a434c6eef..688fe114f20 100644 --- a/dbms/src/Interpreters/DictionaryFactory.cpp +++ b/dbms/src/Interpreters/DictionaryFactory.cpp @@ -31,6 +31,8 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + const auto & layout_type = keys.front(); if ("range_hashed" == layout_type) @@ -41,7 +43,7 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab ErrorCodes::BAD_ARGUMENTS }; - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); } else { @@ -49,16 +51,15 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab throw Exception{ "Elements .structure.range_min and .structure.range_max should be defined only " "for a dictionary of layout 'range_hashed'", - ErrorCodes::BAD_ARGUMENTS - }; + ErrorCodes::BAD_ARGUMENTS}; if ("flat" == layout_type) { - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); } else if ("hashed" == layout_type) { - return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime); + return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty); } else if ("cache" == layout_type) { @@ -66,8 +67,12 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab if (size == 0) throw Exception{ "Dictionary of layout 'cache' cannot have 0 cells", - ErrorCodes::TOO_SMALL_BUFFER_SIZE - }; + ErrorCodes::TOO_SMALL_BUFFER_SIZE}; + + if (require_nonempty) + throw Exception{ + "Dictionary of layout 'cache' cannot have 'require_nonempty' attribute set", + ErrorCodes::BAD_ARGUMENTS}; return std::make_unique(name, dict_struct, std::move(source_ptr), dict_lifetime, size); } From 0d6f4ffae1af41ee0f3e01424e9c85f32a87851e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 04:18:30 +0300 Subject: [PATCH 40/43] dbms: fixed error with dictionaries [#METR-17666]. --- dbms/include/DB/Functions/FunctionsDictionaries.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsDictionaries.h b/dbms/include/DB/Functions/FunctionsDictionaries.h index d49a8ed8180..cf7eaea60ff 100644 --- a/dbms/include/DB/Functions/FunctionsDictionaries.h +++ b/dbms/include/DB/Functions/FunctionsDictionaries.h @@ -862,7 +862,7 @@ private: dict->getString(attr_name, ids, out.get()); block.getByPosition(result).column = new ColumnConst{ - id_col->size(), out->getDataAtWithTerminatingZero(0).toString() + id_col->size(), out->getDataAt(0).toString() }; } else @@ -967,7 +967,7 @@ private: dictionary->getString(attr_name, ids, dates, out.get()); block.getByPosition(result).column = new ColumnConst{ - id_col->size(), out->getDataAtWithTerminatingZero(0).toString() + id_col->size(), out->getDataAt(0).toString() }; } else From dea3c8b8a4d84dd482e29d1f9bbc18bec7a48f4f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 06:57:32 +0300 Subject: [PATCH 41/43] dbms: external dictionaries: fixed bunch of errors; added optional parameter 'dont_check_update_time' for MySQL dictionary source [#METR-17508]. --- dbms/include/DB/Core/Exception.h | 4 +- .../DB/Dictionaries/FileDictionarySource.h | 3 +- .../DB/Dictionaries/MySQLDictionarySource.h | 46 +++++++++++---- .../DB/Interpreters/ExternalDictionaries.h | 12 +++- dbms/src/Core/Exception.cpp | 8 +-- .../src/Interpreters/ExternalDictionaries.cpp | 59 ++++++------------- 6 files changed, 70 insertions(+), 62 deletions(-) diff --git a/dbms/include/DB/Core/Exception.h b/dbms/include/DB/Core/Exception.h index 3d28e412137..e9d8ec3c737 100644 --- a/dbms/include/DB/Core/Exception.h +++ b/dbms/include/DB/Core/Exception.h @@ -28,8 +28,8 @@ ExceptionPtr cloneCurrentException(); /** Попробовать записать исключение в лог (и забыть про него). * Можно использовать в деструкторах в блоке catch (...). */ -void tryLogCurrentException(const char * log_name); -void tryLogCurrentException(Poco::Logger * logger); +void tryLogCurrentException(const char * log_name, const std::string & start_of_message = ""); +void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message = ""); std::string getCurrentExceptionMessage(bool with_stacktrace); diff --git a/dbms/include/DB/Dictionaries/FileDictionarySource.h b/dbms/include/DB/Dictionaries/FileDictionarySource.h index 41e55f64de5..a2f9b0d3c9a 100644 --- a/dbms/include/DB/Dictionaries/FileDictionarySource.h +++ b/dbms/include/DB/Dictionaries/FileDictionarySource.h @@ -20,8 +20,7 @@ class FileDictionarySource final : public IDictionarySource public: FileDictionarySource(const std::string & filename, const std::string & format, Block & sample_block, const Context & context) - : filename{filename}, format{format}, sample_block{sample_block}, context(context), - last_modification{getLastModification()} + : filename{filename}, format{format}, sample_block{sample_block}, context(context) {} FileDictionarySource(const FileDictionarySource & other) diff --git a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h index e791246c35c..abf58b2767f 100644 --- a/dbms/include/DB/Dictionaries/MySQLDictionarySource.h +++ b/dbms/include/DB/Dictionaries/MySQLDictionarySource.h @@ -23,10 +23,10 @@ public: db{config.getString(config_prefix + ".db", "")}, table{config.getString(config_prefix + ".table")}, where{config.getString(config_prefix + ".where", "")}, + dont_check_update_time{config.getBool(config_prefix + ".dont_check_update_time", false)}, sample_block{sample_block}, pool{config, config_prefix}, - load_all_query{composeLoadAllQuery()}, - last_modification{getLastModification()} + load_all_query{composeLoadAllQuery()} {} /// copy-constructor is provided in order to support cloneability @@ -35,6 +35,7 @@ public: db{other.db}, table{other.table}, where{other.where}, + dont_check_update_time{other.dont_check_update_time}, sample_block{other.sample_block}, pool{other.pool}, load_all_query{other.load_all_query}, last_modification{other.last_modification} @@ -43,18 +44,27 @@ public: BlockInputStreamPtr loadAll() override { last_modification = getLastModification(); + + LOG_TRACE(log, load_all_query); return new MySQLBlockInputStream{pool.Get(), load_all_query, sample_block, max_block_size}; } BlockInputStreamPtr loadIds(const std::vector & ids) override { - last_modification = getLastModification(); - const auto query = composeLoadIdsQuery(ids); + /// Здесь не логгируем и не обновляем время модификации, так как запрос может быть большим, и часто задаваться. + const auto query = composeLoadIdsQuery(ids); return new MySQLBlockInputStream{pool.Get(), query, sample_block, max_block_size}; } - bool isModified() const override { return getLastModification() > last_modification; } + bool isModified() const override + { + if (dont_check_update_time) + return true; + + return getLastModification() > last_modification; + } + bool supportsSelectiveLoad() const override { return true; } DictionarySourcePtr clone() const override { return std::make_unique(*this); } @@ -69,28 +79,43 @@ private: mysqlxx::DateTime getLastModification() const { - const auto Update_time_idx = 12; mysqlxx::DateTime update_time{std::time(nullptr)}; + if (dont_check_update_time) + return update_time; + try { auto connection = pool.Get(); - auto query = connection->query("SHOW TABLE STATUS LIKE '%" + strconvert::escaped_for_like(table) + "%';"); + auto query = connection->query("SHOW TABLE STATUS LIKE '" + strconvert::escaped_for_like(table) + "'"); LOG_TRACE(log, query.str()); auto result = query.use(); + size_t fetched_rows = 0; if (auto row = result.fetch()) { - const auto & update_time_value = row[Update_time_idx]; + ++fetched_rows; + const auto UPDATE_TIME_IDX = 12; + const auto & update_time_value = row[UPDATE_TIME_IDX]; if (!update_time_value.isNull()) + { update_time = update_time_value.getDateTime(); + LOG_TRACE(log, "Got update time: " << update_time); + } /// fetch remaining rows to avoid "commands out of sync" error - while (auto row = result.fetch()); + while (auto row = result.fetch()) + ++fetched_rows; } + + if (0 == fetched_rows) + LOG_ERROR(log, "Cannot find table in SHOW TABLE STATUS result."); + + if (fetched_rows > 1) + LOG_ERROR(log, "Found more than one table in SHOW TABLE STATUS result."); } catch (...) { @@ -149,8 +174,6 @@ private: writeChar(';', out); } - LOG_TRACE(log, query); - return query; } @@ -216,6 +239,7 @@ private: const std::string db; const std::string table; const std::string where; + const bool dont_check_update_time; Block sample_block; mutable mysqlxx::PoolWithFailover pool; const std::string load_all_query; diff --git a/dbms/include/DB/Interpreters/ExternalDictionaries.h b/dbms/include/DB/Interpreters/ExternalDictionaries.h index f3747a52cd6..cf2fbe36f10 100644 --- a/dbms/include/DB/Interpreters/ExternalDictionaries.h +++ b/dbms/include/DB/Interpreters/ExternalDictionaries.h @@ -57,9 +57,19 @@ private: std::uint64_t error_count; }; + /** Имя словаря -> словарь. + */ std::unordered_map dictionaries; - std::unordered_map update_times; + + /** Здесь находятся словари, которых ещё ни разу не удалось загрузить. + * В dictionaries они тоже присутствуют, но с нулевым указателем dict. + */ std::unordered_map failed_dictionaries; + + /** И для обычных и для failed_dictionaries. + */ + std::unordered_map update_times; + std::mt19937_64 rnd_engine{getSeed()}; Context & context; diff --git a/dbms/src/Core/Exception.cpp b/dbms/src/Core/Exception.cpp index 789af7beeae..6c2c52baa72 100644 --- a/dbms/src/Core/Exception.cpp +++ b/dbms/src/Core/Exception.cpp @@ -52,16 +52,16 @@ inline std::string demangle(const char * const mangled, int & status) return demangled; } -void tryLogCurrentException(const char * log_name) +void tryLogCurrentException(const char * log_name, const std::string & start_of_message) { - tryLogCurrentException(&Logger::get(log_name)); + tryLogCurrentException(&Logger::get(log_name), start_of_message); } -void tryLogCurrentException(Poco::Logger * logger) +void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_message) { try { - LOG_ERROR(logger, getCurrentExceptionMessage(true)); + LOG_ERROR(logger, start_of_message << (start_of_message.empty() ? "" : ": ") << getCurrentExceptionMessage(true)); } catch (...) { diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 0cdb6713c7f..4a84afa97d1 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -64,7 +64,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) try { auto dict_ptr = failed_dictionary.second.dict->clone(); - if (dict_ptr->getCreationException()) + if (const auto exception_ptr = dict_ptr->getCreationException()) { /// recalculate next attempt time std::uniform_int_distribution distribution( @@ -72,10 +72,11 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) failed_dictionary.second.next_attempt_time = std::chrono::system_clock::now() + std::chrono::seconds{ - std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine)) - }; + std::min(backoff_max_sec, backoff_initial_sec + distribution(rnd_engine))}; ++failed_dictionary.second.error_count; + + std::rethrow_exception(exception_ptr); } else { @@ -99,7 +100,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) } catch (...) { - LOG_ERROR(log, "Failed reloading " << name << " dictionary due to unexpected error"); + tryLogCurrentException(log, "Failed reloading '" + name + "' dictionary"); } } @@ -114,6 +115,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) try { + /// Если словарь не удалось ни разу загрузить или даже не удалось инициализировать из конфига. if (!dictionary.second.dict) continue; @@ -144,6 +146,10 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { /// create new version of dictionary auto new_version = current->clone(); + + if (const auto exception_ptr = new_version->getCreationException()) + std::rethrow_exception(exception_ptr); + dictionary.second.dict->set(new_version.release()); } } @@ -155,25 +161,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { dictionary.second.exception = std::current_exception(); - try - { - throw; - } - catch (const Poco::Exception & e) - { - LOG_ERROR(log, "Cannot update external dictionary '" << name - << "'! You must resolve this manually. " << e.displayText()); - } - catch (const std::exception & e) - { - LOG_ERROR(log, "Cannot update external dictionary '" << name - << "'! You must resolve this manually. " << e.what()); - } - catch (...) - { - LOG_ERROR(log, "Cannot update external dictionary '" << name - << "'! You must resolve this manually."); - } + tryLogCurrentException(log, "Cannot update external dictionary '" + name + "', leaving old version."); } } } @@ -235,6 +223,8 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const throw std::runtime_error{"Overriding dictionary from file " + dict_it->second.origin}; auto dict_ptr = DictionaryFactory::instance().create(name, *config, key, context); + + /// Если словарь не удалось загрузить. if (const auto exception_ptr = dict_ptr->getCreationException()) { const auto failed_dict_it = failed_dictionaries.find(name); @@ -292,6 +282,9 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const { if (!name.empty()) { + /// Если для словаря не удалось загрузить данные или даже не удалось инициализировать из конфига. + /// - всё-равно вставляем информацию в dictionaries, с нулевым указателем dict. + const std::lock_guard lock{dictionaries_mutex}; const auto exception_ptr = std::current_exception(); @@ -302,25 +295,7 @@ void ExternalDictionaries::reloadFromFile(const std::string & config_path, const dict_it->second.exception = exception_ptr; } - try - { - throw; - } - catch (const Poco::Exception & e) - { - LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name - << "'! You must resolve this manually. " << e.displayText()); - } - catch (const std::exception & e) - { - LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name - << "'! You must resolve this manually. " << e.what()); - } - catch (...) - { - LOG_ERROR(log, config_path << ": cannot create external dictionary '" << name - << "'! You must resolve this manually."); - } + tryLogCurrentException(log, "Cannot create external dictionary '" + name + "' from config path " + config_path); /// propagate exception if (throw_on_error) From 18ed0b2829c73d9e7a31639088bbe7ac5c40cedd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 12 Aug 2015 07:21:10 +0300 Subject: [PATCH 42/43] dbms: external dictionaries: fixed errors [#METR-17508]. --- dbms/include/DB/Dictionaries/CacheDictionary.h | 6 +++--- dbms/include/DB/Dictionaries/FlatDictionary.h | 2 +- dbms/include/DB/Dictionaries/HashedDictionary.h | 2 +- dbms/include/DB/Dictionaries/RangeHashedDictionary.h | 2 +- dbms/src/Interpreters/ExternalDictionaries.cpp | 2 +- libs/libmysqlxx/src/PoolWithFailover.cpp | 4 +--- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/dbms/include/DB/Dictionaries/CacheDictionary.h b/dbms/include/DB/Dictionaries/CacheDictionary.h index 07db7c0dda2..15bbe05cb19 100644 --- a/dbms/include/DB/Dictionaries/CacheDictionary.h +++ b/dbms/include/DB/Dictionaries/CacheDictionary.h @@ -620,9 +620,9 @@ private: mutable std::mt19937_64 rnd_engine{getSeed()}; mutable std::size_t bytes_allocated = 0; - mutable std::atomic element_count{}; - mutable std::atomic hit_count{}; - mutable std::atomic query_count{}; + mutable std::atomic element_count{0}; + mutable std::atomic hit_count{0}; + mutable std::atomic query_count{0}; const std::chrono::time_point creation_time = std::chrono::system_clock::now(); }; diff --git a/dbms/include/DB/Dictionaries/FlatDictionary.h b/dbms/include/DB/Dictionaries/FlatDictionary.h index b1267bc618f..0c1c2fb33e5 100644 --- a/dbms/include/DB/Dictionaries/FlatDictionary.h +++ b/dbms/include/DB/Dictionaries/FlatDictionary.h @@ -361,7 +361,7 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; - mutable std::atomic query_count; + mutable std::atomic query_count{0}; std::chrono::time_point creation_time; diff --git a/dbms/include/DB/Dictionaries/HashedDictionary.h b/dbms/include/DB/Dictionaries/HashedDictionary.h index e356808fec5..a73f8869f81 100644 --- a/dbms/include/DB/Dictionaries/HashedDictionary.h +++ b/dbms/include/DB/Dictionaries/HashedDictionary.h @@ -347,7 +347,7 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; - mutable std::atomic query_count{}; + mutable std::atomic query_count{0}; std::chrono::time_point creation_time; diff --git a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h index a00208c5b48..888173b8940 100644 --- a/dbms/include/DB/Dictionaries/RangeHashedDictionary.h +++ b/dbms/include/DB/Dictionaries/RangeHashedDictionary.h @@ -422,7 +422,7 @@ private: std::size_t bytes_allocated = 0; std::size_t element_count = 0; std::size_t bucket_count = 0; - mutable std::atomic query_count{}; + mutable std::atomic query_count{0}; std::chrono::time_point creation_time; diff --git a/dbms/src/Interpreters/ExternalDictionaries.cpp b/dbms/src/Interpreters/ExternalDictionaries.cpp index 4a84afa97d1..0e4525efb9a 100644 --- a/dbms/src/Interpreters/ExternalDictionaries.cpp +++ b/dbms/src/Interpreters/ExternalDictionaries.cpp @@ -161,7 +161,7 @@ void ExternalDictionaries::reloadImpl(const bool throw_on_error) { dictionary.second.exception = std::current_exception(); - tryLogCurrentException(log, "Cannot update external dictionary '" + name + "', leaving old version."); + tryLogCurrentException(log, "Cannot update external dictionary '" + name + "', leaving old version"); } } } diff --git a/libs/libmysqlxx/src/PoolWithFailover.cpp b/libs/libmysqlxx/src/PoolWithFailover.cpp index c98b42036d2..5fe4c64dfdf 100644 --- a/libs/libmysqlxx/src/PoolWithFailover.cpp +++ b/libs/libmysqlxx/src/PoolWithFailover.cpp @@ -13,10 +13,8 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & cfg cfg.keys(config_name, replica_keys); for (Poco::Util::AbstractConfiguration::Keys::const_iterator it = replica_keys.begin(); it != replica_keys.end(); ++it) { - if (!(*it == "port" || *it == "user" || *it == "password" || *it == "db" || *it == "table")) + if (*it == "replica") /// На том же уровне могут быть другие параметры. { - if (it->size() < std::string("replica").size() || it->substr(0, std::string("replica").size()) != "replica") - throw Poco::Exception("Unknown element in config: " + *it + ", expected replica"); std::string replica_name = config_name + "." + *it; Replica replica(new Pool(cfg, replica_name, default_connections, max_connections, config_name.c_str()), cfg.getInt(replica_name + ".priority", 0)); From 3ff1a857fe6019dcc99ff4373ad5e90de13ac173 Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Wed, 12 Aug 2015 19:26:53 +0300 Subject: [PATCH 43/43] dbms: Server: Use another hash function. [#METR-17276] --- .../AggregateFunctionUniq.h | 45 +++---- .../00212_aggregate_function_uniq.reference | 114 +++++++++--------- 2 files changed, 71 insertions(+), 88 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 8aa6edf22fb..3975f238818 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -98,7 +98,7 @@ struct AggregateFunctionUniqExactData template struct AggregateFunctionUniqCombinedData { - using Key = T; + using Key = UInt32; using Set = CombinedCardinalityEstimator >, 16, 14, 17, TrivialHash>; Set set; @@ -118,30 +118,6 @@ struct AggregateFunctionUniqCombinedData namespace detail { -/** Хэширование 64-битных целочисленных значений в 32-битные. - * Источник: https://gist.github.com/badboy/6267743 - */ -template -struct Hash64To32; - -template -struct Hash64To32::value || std::is_same::value>::type> -{ - static UInt32 compute(T key) - { - using U = typename std::make_unsigned::type; - auto x = static_cast(key); - - x = (~x) + (x << 18); - x = x ^ (x >> 31); - x = x * 21; - x = x ^ (x >> 11); - x = x + (x << 6); - x = x ^ (x >> 22); - return static_cast(x); - } -}; - /** Хэш-функция для uniqCombined. */ template @@ -154,26 +130,33 @@ struct CombinedCardinalityTraits }; template -struct CombinedCardinalityTraits::value || std::is_same::value>::type> +struct CombinedCardinalityTraits::value>::type> { - using Op = Hash64To32; + using U = typename std::make_unsigned::type; static UInt32 hash(T key) { - return Op::compute(key); + return intHash32<0>(static_cast(key)); + }; +}; + +template +struct CombinedCardinalityTraits::value>::type> +{ + static UInt32 hash(T key) + { + return intHash32<0>(key); }; }; template struct CombinedCardinalityTraits::value>::type> { - using Op = Hash64To32; - static UInt32 hash(T key) { UInt64 res = 0; memcpy(reinterpret_cast(&res), reinterpret_cast(&key), sizeof(key)); - return Op::compute(res); + return intHash32<0>(res); } }; diff --git a/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference index 288258c7d81..d66effa9fb1 100644 --- a/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference +++ b/dbms/tests/queries/0_stateless/00212_aggregate_function_uniq.reference @@ -239,25 +239,25 @@ 31 162 35 162 36 162 -0 53988 -1 54083 -3 53994 -6 53948 -7 54209 -9 54112 -10 27000 -11 54058 -13 54158 -14 53926 -17 54094 -19 54127 -20 54065 -21 54207 -22 54056 -26 53982 -31 54156 -35 53960 -36 54076 +0 54226 +1 54034 +3 54016 +6 53982 +7 54076 +9 54218 +10 27075 +11 54093 +13 54108 +14 54096 +17 54294 +19 54070 +20 54028 +21 54170 +22 54106 +26 54103 +31 54050 +35 54130 +36 53868 0.125 1 0.5 1 0.05 1 @@ -291,25 +291,25 @@ 0.043 162 0.037 162 0.071 162 -0.045 54207 -0.125 54209 -0.5 54083 -0.05 54127 -0.143 53948 -0.091 27000 -0.056 54094 -0.048 54065 -0.083 54058 -0.25 53994 -1 53988 -0.1 54112 -0.028 53960 -0.027 54076 -0.031 54156 -0.067 53926 -0.043 54056 -0.037 53982 -0.071 54158 +0.045 54170 +0.125 54076 +0.5 54034 +0.05 54070 +0.143 53982 +0.091 27075 +0.056 54294 +0.048 54028 +0.083 54093 +0.25 54016 +1 54226 +0.1 54218 +0.028 54130 +0.027 53868 +0.031 54050 +0.067 54096 +0.043 54106 +0.037 54103 +0.071 54108 0.5 1 0.05 1 0.25 1 @@ -343,25 +343,25 @@ 0.037 162 0.1 163 1 162 -0.5 54083 -0.05 54127 -0.25 53994 -0.048 54065 -0.091 27000 -0.043 54056 -0.071 54158 -0.083 54058 -0.125 54209 -0.031 54156 -0.143 53948 -0.028 53960 -0.067 53926 -0.045 54207 -0.027 54076 -0.056 54094 -0.037 53982 -0.1 54112 -1 53988 +0.5 54034 +0.05 54070 +0.25 54016 +0.048 54028 +0.091 27075 +0.043 54106 +0.071 54108 +0.083 54093 +0.125 54076 +0.031 54050 +0.143 53982 +0.028 54130 +0.067 54096 +0.045 54170 +0.027 53868 +0.056 54294 +0.037 54103 +0.1 54218 +1 54226 1 1 3 1 6 1