From 0931ac57ae5e86dfb2a104d1a3dcc8f0fc6098fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2013 14:16:16 +0000 Subject: [PATCH] dbms: improved performance [#CONV-2944]. --- .../AggregateFunctions/AggregateFunctionAvg.h | 55 ++-- .../AggregateFunctionQuantile.h | 4 +- .../AggregateFunctions/AggregateFunctionSum.h | 52 +--- .../AggregateFunctionUniq.h | 47 ++- .../AggregateFunctionFactory.cpp | 286 +++++++++--------- dbms/src/Interpreters/Aggregator.cpp | 10 + 6 files changed, 227 insertions(+), 227 deletions(-) diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h index 71d3a2ea550..9bddf7bc8a9 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionAvg.h @@ -12,18 +12,19 @@ namespace DB { +template struct AggregateFunctionAvgData { - Float64 sum; + T sum; UInt64 count; AggregateFunctionAvgData() : sum(0), count(0) {} }; -/// Считает арифметическое среднее значение чисел. Параметром шаблона может быть UInt64, Int64 или Float64. +/// Считает арифметическое среднее значение чисел. template -class AggregateFunctionAvg : public IUnaryAggregateFunction +class AggregateFunctionAvg : public IUnaryAggregateFunction::Type> > { public: String getName() const { return "avg"; } @@ -44,35 +45,35 @@ public: void addOne(AggregateDataPtr place, const IColumn & column, size_t row_num) const { - data(place).sum += get(column[row_num]); - ++data(place).count; + this->data(place).sum += static_cast &>(column).getData()[row_num]; + ++this->data(place).count; } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const { - data(place).sum += data(rhs).sum; - data(place).count += data(rhs).count; + this->data(place).sum += this->data(rhs).sum; + this->data(place).count += this->data(rhs).count; } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const { - writeFloatBinary(data(place).sum, buf); - writeVarUInt(data(place).count, buf); + writeBinary(this->data(place).sum, buf); + writeVarUInt(this->data(place).count, buf); } void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const { - Float64 tmp_sum = 0; + typename NearestFieldType::Type tmp_sum = 0; UInt64 tmp_count = 0; - readFloatBinary(tmp_sum, buf); + readBinary(tmp_sum, buf); readVarUInt(tmp_count, buf); - data(place).sum += tmp_sum; - data(place).count += tmp_count; + this->data(place).sum += tmp_sum; + this->data(place).count += tmp_count; } Field getResult(ConstAggregateDataPtr place) const { - return data(place).sum / data(place).count; + return static_cast(this->data(place).sum) / this->data(place).count; } }; @@ -82,7 +83,7 @@ public: * avgIf(x, cond) эквивалентно sum(cond ? x : 0) / sum(cond). */ template -class AggregateFunctionAvgIf : public IAggregateFunctionHelper +class AggregateFunctionAvgIf : public IAggregateFunctionHelper::Type> > { public: String getName() const { return "avgIf"; } @@ -106,38 +107,38 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const { - if (columns[1]->getDataAt(row_num).data[0]) + if (static_cast(*columns[1]).getData()[row_num]) { - data(place).sum += get((*columns[0])[row_num]); - ++data(place).count; + this->data(place).sum += static_cast &>(*columns[0]).getData()[row_num]; + ++this->data(place).count; } } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const { - data(place).sum += data(rhs).sum; - data(place).count += data(rhs).count; + this->data(place).sum += this->data(rhs).sum; + this->data(place).count += this->data(rhs).count; } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const { - writeFloatBinary(data(place).sum, buf); - writeVarUInt(data(place).count, buf); + writeBinary(this->data(place).sum, buf); + writeVarUInt(this->data(place).count, buf); } void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const { - Float64 tmp_sum = 0; + typename NearestFieldType::Type tmp_sum = 0; UInt64 tmp_count = 0; - readFloatBinary(tmp_sum, buf); + readBinary(tmp_sum, buf); readVarUInt(tmp_count, buf); - data(place).sum += tmp_sum; - data(place).count += tmp_count; + this->data(place).sum += tmp_sum; + this->data(place).count += tmp_count; } Field getResult(ConstAggregateDataPtr place) const { - return data(place).sum / data(place).count; + return static_cast(this->data(place).sum) / this->data(place).count; } }; diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h index 9106a305dab..e7c11162ec7 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionQuantile.h @@ -66,9 +66,7 @@ public: void addOne(AggregateDataPtr place, const IColumn & column, size_t row_num) const { - this->data(place).sample.insert( - static_cast::Type>( - *reinterpret_cast(column.getDataAt(row_num).data))); + this->data(place).sample.insert(static_cast &>(column).getData()[row_num]); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h index 62c23e729c6..0b3bc2fdaa7 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionSum.h @@ -11,30 +11,6 @@ namespace DB { -template struct AggregateFunctionSumTraits; - -template <> struct AggregateFunctionSumTraits -{ - static DataTypePtr getReturnType() { return new DataTypeUInt64; } - static void write(UInt64 x, WriteBuffer & buf) { writeVarUInt(x, buf); } - static void read(UInt64 & x, ReadBuffer & buf) { readVarUInt(x, buf); } -}; - -template <> struct AggregateFunctionSumTraits -{ - static DataTypePtr getReturnType() { return new DataTypeInt64; } - static void write(Int64 x, WriteBuffer & buf) { writeVarInt(x, buf); } - static void read(Int64 & x, ReadBuffer & buf) { readVarInt(x, buf); } -}; - -template <> struct AggregateFunctionSumTraits -{ - static DataTypePtr getReturnType() { return new DataTypeFloat64; } - static void write(Float64 x, WriteBuffer & buf) { writeFloatBinary(x, buf); } - static void read(Float64 & x, ReadBuffer & buf) { readFloatBinary(x, buf); } -}; - - template struct AggregateFunctionSumData { @@ -44,9 +20,9 @@ struct AggregateFunctionSumData }; -/// Считает сумму чисел. Параметром шаблона может быть UInt64, Int64 или Float64. +/// Считает сумму чисел. template -class AggregateFunctionSum : public IUnaryAggregateFunction > +class AggregateFunctionSum : public IUnaryAggregateFunction::Type> > { public: String getName() const { return "sum"; } @@ -54,7 +30,7 @@ public: DataTypePtr getReturnType() const { - return AggregateFunctionSumTraits::getReturnType(); + return new typename DataTypeFromFieldType::Type>::Type; } void setArgument(const DataTypePtr & argument) @@ -67,7 +43,7 @@ public: void addOne(AggregateDataPtr place, const IColumn & column, size_t row_num) const { - this->data(place).sum += get(column[row_num]); + this->data(place).sum += static_cast &>(column).getData()[row_num]; } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const @@ -77,13 +53,13 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const { - AggregateFunctionSumTraits::write(this->data(place).sum, buf); + writeBinary(this->data(place).sum, buf); } void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const { - T tmp; - AggregateFunctionSumTraits::read(tmp, buf); + typename NearestFieldType::Type tmp; + readBinary(tmp, buf); this->data(place).sum += tmp; } @@ -96,7 +72,7 @@ public: /// Считает сумму чисел при выполнении условия. sumIf(x, cond) эквивалентно sum(cond ? x : 0). template -class AggregateFunctionSumIf : public IAggregateFunctionHelper > +class AggregateFunctionSumIf : public IAggregateFunctionHelper::Type> > { public: String getName() const { return "sumIf"; } @@ -104,7 +80,7 @@ public: DataTypePtr getReturnType() const { - return AggregateFunctionSumTraits::getReturnType(); + return new typename DataTypeFromFieldType::Type>::Type; } void setArguments(const DataTypes & arguments) @@ -120,8 +96,8 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const { - if (columns[1]->getDataAt(row_num).data[0]) - this->data(place).sum += get((*columns[0])[row_num]); + if (static_cast(*columns[1]).getData()[row_num]) + this->data(place).sum += static_cast &>(*columns[0]).getData()[row_num]; } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const @@ -131,13 +107,13 @@ public: void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const { - AggregateFunctionSumTraits::write(this->data(place).sum, buf); + writeBinary(this->data(place).sum, buf); } void deserializeMerge(AggregateDataPtr place, ReadBuffer & buf) const { - T tmp; - AggregateFunctionSumTraits::read(tmp, buf); + typename NearestFieldType::Type tmp; + readBinary(tmp, buf); this->data(place).sum += tmp; } diff --git a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h index 2e0e94af270..f26681d3ec5 100644 --- a/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h +++ b/dbms/include/DB/AggregateFunctions/AggregateFunctionUniq.h @@ -18,16 +18,19 @@ namespace DB { -template struct AggregateFunctionUniqTraits; - -template <> struct AggregateFunctionUniqTraits +template struct AggregateFunctionUniqTraits { - static UInt64 hash(UInt64 x) { return x; } + static UInt64 hash(T x) { return x; } }; -template <> struct AggregateFunctionUniqTraits +template <> struct AggregateFunctionUniqTraits { - static UInt64 hash(Int64 x) { return x; } + static UInt64 hash(Float32 x) + { + UInt64 res = 0; + memcpy(reinterpret_cast(&res), reinterpret_cast(&x), sizeof(x)); + return res; + } }; template <> struct AggregateFunctionUniqTraits @@ -40,12 +43,6 @@ template <> struct AggregateFunctionUniqTraits } }; -template <> struct AggregateFunctionUniqTraits -{ - /// Имейте ввиду, что вычисление приближённое. - static UInt64 hash(const String & x) { return CityHash64(x.data(), x.size()); } -}; - struct AggregateFunctionUniqData { @@ -72,10 +69,9 @@ public: { } - void addOne(AggregateDataPtr place, const IColumn & column, size_t row_num) const { - data(place).set.insert(AggregateFunctionUniqTraits::hash(get(column[row_num]))); + data(place).set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(column).getData()[row_num])); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const @@ -101,6 +97,14 @@ public: } }; +template <> +inline void AggregateFunctionUniq::addOne(AggregateDataPtr place, const IColumn & column, size_t row_num) const +{ + /// Имейте ввиду, что вычисление приближённое. + StringRef value = column.getDataAt(row_num); + data(place).set.insert(CityHash64(value.data, value.size)); +} + /** То же самое, но выводит состояние вычислений в строке в текстовом виде. * Используется, если какой-то внешней программе (сейчас это ███████████) @@ -154,8 +158,8 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const { - if (columns[1]->getDataAt(row_num).data[0]) - data(place).set.insert(AggregateFunctionUniqTraits::hash(get((*columns[0])[row_num]))); + if (static_cast(*columns[1]).getData()[row_num]) + data(place).set.insert(AggregateFunctionUniqTraits::hash(static_cast &>(*columns[0]).getData()[row_num])); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs) const @@ -181,4 +185,15 @@ public: } }; +template <> +inline void AggregateFunctionUniqIf::add(AggregateDataPtr place, const IColumn ** columns, size_t row_num) const +{ + if (static_cast(*columns[1]).getData()[row_num]) + { + /// Имейте ввиду, что вычисление приближённое. + StringRef value = columns[0]->getDataAt(row_num); + data(place).set.insert(CityHash64(value.data, value.size)); + } +} + } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp index 7b0a6ad8ff9..c52187aed90 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionFactory.cpp @@ -15,6 +15,8 @@ #include #include +#include +#include namespace DB @@ -26,6 +28,45 @@ AggregateFunctionFactory::AggregateFunctionFactory() } +/** Создать агрегатную функцию с числовым типом в параметре шаблона, в зависимости от типа аргумента. + */ +template