From 91cb53f5849bfaa4bf04fff43b3beec3ee9e17c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 1 Mar 2015 03:14:55 +0300 Subject: [PATCH 01/32] dbms: whitespaces [#METR-2944]. --- dbms/src/Interpreters/Set.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 8577024c619..652a26da2a8 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -129,7 +129,7 @@ bool Set::insertFromBlock(const Block & block, bool create_ordered_set) UInt64 key = column.get64(i); res.insert(key); - if(create_ordered_set) + if (create_ordered_set) ordered_set_elements->push_back(column[i]); } } @@ -156,7 +156,7 @@ bool Set::insertFromBlock(const Block & block, bool create_ordered_set) if (inserted) it->data = string_pool.insert(ref.data, ref.size); - if(create_ordered_set) + if (create_ordered_set) ordered_set_elements->push_back(std::string(ref.data, ref.size)); } } @@ -178,7 +178,7 @@ bool Set::insertFromBlock(const Block & block, bool create_ordered_set) if (inserted) it->data = string_pool.insert(ref.data, ref.size); - if(create_ordered_set) + if (create_ordered_set) ordered_set_elements->push_back(std::string(ref.data, ref.size)); } } From eea281236d10dcee14a6414140fd1f878e72453a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 1 Mar 2015 04:06:49 +0300 Subject: [PATCH 02/32] dbms: improved performance of aggregate functions argMin, argMax (about 5 times) [#METR-2944]. --- .../AggregateFunctionsArgMinMax.h | 118 +++----------- .../AggregateFunctionsMinMaxAny.h | 148 +++++++++++++++--- .../AggregateFunctionFactory.cpp | 78 ++++++++- 3 files changed, 218 insertions(+), 126 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h index 67265e90ebc..edf5e8e0e1d 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsArgMinMax.h @@ -3,41 +3,31 @@ #include #include -#include +#include namespace DB { -struct AggregateFunctionArgMinTraits -{ - static bool better(const Field & lhs, const Field & rhs) { return lhs < rhs; } - static String name() { return "argMin"; } -}; - -struct AggregateFunctionArgMaxTraits -{ - static bool better(const Field & lhs, const Field & rhs) { return lhs > rhs; } - static String name() { return "argMax"; } -}; - +/// Возможные значения параметров шаблонов см. в AggregateFunctionsMinMaxAny.h +template struct AggregateFunctionsArgMinMaxData { - Field result; // аргумент, при котором достигается минимальное/максимальное значение value. - Field value; // значение, для которого считается минимум/максимум. + ResultData result; // аргумент, при котором достигается минимальное/максимальное значение value. + ValueData value; // значение, для которого считается минимум/максимум. }; /// Возвращает первое попавшееся значение arg для минимального/максимального value. Пример: argMax(arg, value). -template -class AggregateFunctionsArgMinMax final : public IAggregateFunctionHelper +template +class AggregateFunctionsArgMinMax final : public IAggregateFunctionHelper { private: DataTypePtr type_res; DataTypePtr type_val; public: - String getName() const { return Traits::name(); } + String getName() const { return String("arg") + decltype(Data::value)::name(); } DataTypePtr getReturnType() const { @@ -55,105 +45,37 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const { - Field result; - Field value; - columns[0]->get(row_num, result); - columns[1]->get(row_num, value); - Data & d = data(place); - - if (!d.value.isNull()) - { - if (Traits::better(value, d.value)) - { - d.result = result; - d.value = value; - } - } - else - { - d.result = result; - d.value = value; - } + if (this->data(place).value.changeIfBetter(*columns[1], row_num)) + this->data(place).result.change(*columns[0], row_num); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const { - Data & d = data(place); - const Data & d_rhs = data(rhs); - - if (!d.value.isNull()) - { - if (Traits::better(d_rhs.value, d.value)) - { - d.result = d_rhs.result; - d.value = d_rhs.value; - } - } - else - { - d.result = d_rhs.result; - d.value = d_rhs.value; - } + if (this->data(place).value.changeIfBetter(this->data(rhs).value)) + this->data(place).result.change(this->data(rhs).result); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const { - const Data & d = data(place); - - if (unlikely(d.result.isNull())) - { - writeBinary(false, buf); - } - else - { - writeBinary(true, buf); - type_res->serializeBinary(d.result, buf); - type_val->serializeBinary(d.value, buf); - } + this->data(place).result.write(buf, *type_res.get()); + this->data(place).value.write(buf, *type_val.get()); } void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const { - Data & d = data(place); + Data rhs; /// Для строчек не очень оптимально, так как может делаться одна лишняя аллокация. - bool is_not_null = false; - readBinary(is_not_null, buf); + rhs.result.read(buf, *type_res.get()); + rhs.value.read(buf, *type_val.get()); - if (is_not_null) - { - if (!d.value.isNull()) - { - Field result_; - Field value_; - - type_res->deserializeBinary(result_, buf); - type_val->deserializeBinary(value_, buf); - - if (Traits::better(value_, d.value)) - { - d.result = result_; - d.value = value_; - } - } - else - { - type_res->deserializeBinary(d.result, buf); - type_val->deserializeBinary(d.value, buf); - } - } + if (this->data(place).value.changeIfBetter(rhs.value)) + this->data(place).result.change(rhs.result); } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const { - if (unlikely(data(place).value.isNull())) - to.insertDefault(); - else - to.insert(data(place).result); + this->data(place).result.insertResultInto(to); } }; -typedef AggregateFunctionsArgMinMax AggregateFunctionArgMin; -typedef AggregateFunctionsArgMinMax AggregateFunctionArgMax; - - } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h index 9f7311fce5a..96bdd1791a8 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionsMinMaxAny.h @@ -67,40 +67,70 @@ struct SingleValueDataFixed value = to.value; } - void changeFirstTime(const IColumn & column, size_t row_num) + bool changeFirstTime(const IColumn & column, size_t row_num) { if (!has()) + { change(column, row_num); + return true; + } + else + return false; } - void changeFirstTime(const Self & to) + bool changeFirstTime(const Self & to) { if (!has()) + { change(to); + return true; + } + else + return false; } - void changeIfLess(const IColumn & column, size_t row_num) + bool changeIfLess(const IColumn & column, size_t row_num) { if (!has() || static_cast &>(column).getData()[row_num] < value) + { change(column, row_num); + return true; + } + else + return false; } - void changeIfLess(const Self & to) + bool changeIfLess(const Self & to) { if (to.has() && (!has() || to.value < value)) + { change(to); + return true; + } + else + return false; } - void changeIfGreater(const IColumn & column, size_t row_num) + bool changeIfGreater(const IColumn & column, size_t row_num) { if (!has() || static_cast &>(column).getData()[row_num] > value) + { change(column, row_num); + return true; + } + else + return false; } - void changeIfGreater(const Self & to) + bool changeIfGreater(const Self & to) { if (to.has() && (!has() || to.value > value)) + { change(to); + return true; + } + else + return false; } }; @@ -238,40 +268,70 @@ struct __attribute__((__packed__)) SingleValueDataString changeImpl(to.getStringRef()); } - void changeFirstTime(const IColumn & column, size_t row_num) + bool changeFirstTime(const IColumn & column, size_t row_num) { if (!has()) + { change(column, row_num); + return true; + } + else + return false; } - void changeFirstTime(const Self & to) + bool changeFirstTime(const Self & to) { if (!has()) + { change(to); + return true; + } + else + return false; } - void changeIfLess(const IColumn & column, size_t row_num) + bool changeIfLess(const IColumn & column, size_t row_num) { if (!has() || static_cast(column).getDataAtWithTerminatingZero(row_num) < getStringRef()) + { change(column, row_num); + return true; + } + else + return false; } - void changeIfLess(const Self & to) + bool changeIfLess(const Self & to) { if (to.has() && (!has() || to.getStringRef() < getStringRef())) + { change(to); + return true; + } + else + return false; } - void changeIfGreater(const IColumn & column, size_t row_num) + bool changeIfGreater(const IColumn & column, size_t row_num) { if (!has() || static_cast(column).getDataAtWithTerminatingZero(row_num) > getStringRef()) + { change(column, row_num); + return true; + } + else + return false; } - void changeIfGreater(const Self & to) + bool changeIfGreater(const Self & to) { if (to.has() && (!has() || to.getStringRef() > getStringRef())) + { change(to); + return true; + } + else + return false; } }; @@ -326,54 +386,90 @@ struct SingleValueDataGeneric value = to.value; } - void changeFirstTime(const IColumn & column, size_t row_num) + bool changeFirstTime(const IColumn & column, size_t row_num) { if (!has()) + { change(column, row_num); + return true; + } + else + return false; } - void changeFirstTime(const Self & to) + bool changeFirstTime(const Self & to) { if (!has()) + { change(to); + return true; + } + else + return false; } - void changeIfLess(const IColumn & column, size_t row_num) + bool changeIfLess(const IColumn & column, size_t row_num) { if (!has()) + { change(column, row_num); + return true; + } else { Field new_value; column.get(row_num, new_value); if (new_value < value) + { value = new_value; + return true; + } + else + return false; } } - void changeIfLess(const Self & to) + bool changeIfLess(const Self & to) { if (to.has() && (!has() || to.value < value)) + { change(to); + return true; + } + else + return false; } - void changeIfGreater(const IColumn & column, size_t row_num) + bool changeIfGreater(const IColumn & column, size_t row_num) { if (!has()) + { change(column, row_num); + return true; + } else { Field new_value; column.get(row_num, new_value); if (new_value > value) + { value = new_value; + return true; + } + else + return false; } } - void changeIfGreater(const Self & to) + bool changeIfGreater(const Self & to) { if (to.has() && (!has() || to.value > value)) + { change(to); + return true; + } + else + return false; } }; @@ -388,8 +484,8 @@ struct AggregateFunctionMinData : Data { typedef AggregateFunctionMinData Self; - void changeIfBetter(const IColumn & column, size_t row_num) { this->changeIfLess(column, row_num); } - void changeIfBetter(const Self & to) { this->changeIfLess(to); } + bool changeIfBetter(const IColumn & column, size_t row_num) { return this->changeIfLess(column, row_num); } + bool changeIfBetter(const Self & to) { return this->changeIfLess(to); } static const char * name() { return "min"; } }; @@ -399,8 +495,8 @@ struct AggregateFunctionMaxData : Data { typedef AggregateFunctionMaxData Self; - void changeIfBetter(const IColumn & column, size_t row_num) { this->changeIfGreater(column, row_num); } - void changeIfBetter(const Self & to) { this->changeIfGreater(to); } + bool changeIfBetter(const IColumn & column, size_t row_num) { return this->changeIfGreater(column, row_num); } + bool changeIfBetter(const Self & to) { return this->changeIfGreater(to); } static const char * name() { return "max"; } }; @@ -410,8 +506,8 @@ struct AggregateFunctionAnyData : Data { typedef AggregateFunctionAnyData Self; - void changeIfBetter(const IColumn & column, size_t row_num) { this->changeFirstTime(column, row_num); } - void changeIfBetter(const Self & to) { this->changeFirstTime(to); } + bool changeIfBetter(const IColumn & column, size_t row_num) { return this->changeFirstTime(column, row_num); } + bool changeIfBetter(const Self & to) { return this->changeFirstTime(to); } static const char * name() { return "any"; } }; @@ -421,8 +517,8 @@ struct AggregateFunctionAnyLastData : Data { typedef AggregateFunctionAnyLastData Self; - void changeIfBetter(const IColumn & column, size_t row_num) { this->change(column, row_num); } - void changeIfBetter(const Self & to) { this->change(to); } + bool changeIfBetter(const IColumn & column, size_t row_num) { this->change(column, row_num); return true; } + bool changeIfBetter(const Self & to) { this->change(to); return true; } static const char * name() { return "anyLast"; } }; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 1ce1c1f2083..153d6600581 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -154,6 +154,80 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na } +/// argMin, argMax +template