dbms: Server: Performance improvements. [#METR-15210]

This commit is contained in:
Alexey Arno 2015-05-28 17:00:52 +03:00
parent 79bad96d98
commit 8125cad146
2 changed files with 110 additions and 108 deletions

View File

@ -295,13 +295,13 @@ namespace DB
struct FloatRoundingComputation<Float32, rounding_mode, PositiveScale> struct FloatRoundingComputation<Float32, rounding_mode, PositiveScale>
: public BaseFloatRoundingComputation<Float32> : public BaseFloatRoundingComputation<Float32>
{ {
static inline void prepareScale(size_t scale, Scale & mm_scale) static inline void prepare(size_t scale, Scale & mm_scale)
{ {
Float32 fscale = static_cast<Float32>(scale); Float32 fscale = static_cast<Float32>(scale);
mm_scale = _mm_load1_ps(&fscale); mm_scale = _mm_load1_ps(&fscale);
} }
static inline void compute(const Float32 * in, const Scale & scale, Float32 * out) static inline void compute(const Float32 * __restrict in, const Scale & scale, Float32 * __restrict out)
{ {
__m128 val = _mm_loadu_ps(in); __m128 val = _mm_loadu_ps(in);
val = _mm_mul_ps(val, scale); val = _mm_mul_ps(val, scale);
@ -315,13 +315,13 @@ namespace DB
struct FloatRoundingComputation<Float32, rounding_mode, NegativeScale> struct FloatRoundingComputation<Float32, rounding_mode, NegativeScale>
: public BaseFloatRoundingComputation<Float32> : public BaseFloatRoundingComputation<Float32>
{ {
static inline void prepareScale(size_t scale, Scale & mm_scale) static inline void prepare(size_t scale, Scale & mm_scale)
{ {
Float32 fscale = static_cast<Float32>(scale); Float32 fscale = static_cast<Float32>(scale);
mm_scale = _mm_load1_ps(&fscale); mm_scale = _mm_load1_ps(&fscale);
} }
static inline void compute(const Float32 * in, const Scale & scale, Float32 * out) static inline void compute(const Float32 * __restrict in, const Scale & scale, Float32 * __restrict out)
{ {
__m128 val = _mm_loadu_ps(in); __m128 val = _mm_loadu_ps(in);
@ -379,11 +379,11 @@ namespace DB
struct FloatRoundingComputation<Float32, rounding_mode, ZeroScale> struct FloatRoundingComputation<Float32, rounding_mode, ZeroScale>
: public BaseFloatRoundingComputation<Float32> : public BaseFloatRoundingComputation<Float32>
{ {
static inline void prepareScale(size_t scale, Scale & mm_scale) static inline void prepare(size_t scale, Scale & mm_scale)
{ {
} }
static inline void compute(const Float32 * in, const Scale & scale, Float32 * out) static inline void compute(const Float32 * __restrict in, const Scale & scale, Float32 * __restrict out)
{ {
__m128 val = _mm_loadu_ps(in); __m128 val = _mm_loadu_ps(in);
val = _mm_round_ps(val, rounding_mode); val = _mm_round_ps(val, rounding_mode);
@ -395,13 +395,13 @@ namespace DB
struct FloatRoundingComputation<Float64, rounding_mode, PositiveScale> struct FloatRoundingComputation<Float64, rounding_mode, PositiveScale>
: public BaseFloatRoundingComputation<Float64> : public BaseFloatRoundingComputation<Float64>
{ {
static inline void prepareScale(size_t scale, Scale & mm_scale) static inline void prepare(size_t scale, Scale & mm_scale)
{ {
Float64 fscale = static_cast<Float64>(scale); Float64 fscale = static_cast<Float64>(scale);
mm_scale = _mm_load1_pd(&fscale); mm_scale = _mm_load1_pd(&fscale);
} }
static inline void compute(const Float64 * in, const Scale & scale, Float64 * out) static inline void compute(const Float64 * __restrict in, const Scale & scale, Float64 * __restrict out)
{ {
__m128d val = _mm_loadu_pd(in); __m128d val = _mm_loadu_pd(in);
val = _mm_mul_pd(val, scale); val = _mm_mul_pd(val, scale);
@ -415,13 +415,13 @@ namespace DB
struct FloatRoundingComputation<Float64, rounding_mode, NegativeScale> struct FloatRoundingComputation<Float64, rounding_mode, NegativeScale>
: public BaseFloatRoundingComputation<Float64> : public BaseFloatRoundingComputation<Float64>
{ {
static inline void prepareScale(size_t scale, Scale & mm_scale) static inline void prepare(size_t scale, Scale & mm_scale)
{ {
Float64 fscale = static_cast<Float64>(scale); Float64 fscale = static_cast<Float64>(scale);
mm_scale = _mm_load1_pd(&fscale); mm_scale = _mm_load1_pd(&fscale);
} }
static inline void compute(const Float64 * in, const Scale & scale, Float64 * out) static inline void compute(const Float64 * __restrict in, const Scale & scale, Float64 * __restrict out)
{ {
__m128d val = _mm_loadu_pd(in); __m128d val = _mm_loadu_pd(in);
@ -479,11 +479,11 @@ namespace DB
struct FloatRoundingComputation<Float64, rounding_mode, ZeroScale> struct FloatRoundingComputation<Float64, rounding_mode, ZeroScale>
: public BaseFloatRoundingComputation<Float64> : public BaseFloatRoundingComputation<Float64>
{ {
static inline void prepareScale(size_t scale, Scale & mm_scale) static inline void prepare(size_t scale, Scale & mm_scale)
{ {
} }
static inline void compute(const Float64 * in, const Scale & scale, Float64 * out) static inline void compute(const Float64 * __restrict in, const Scale & scale, Float64 * __restrict out)
{ {
__m128d val = _mm_loadu_pd(in); __m128d val = _mm_loadu_pd(in);
val = _mm_round_pd(val, rounding_mode); val = _mm_round_pd(val, rounding_mode);
@ -512,11 +512,11 @@ namespace DB
{ {
auto divisor = Op::prepare(scale); auto divisor = Op::prepare(scale);
auto begin_in = &in[0]; const T* begin_in = &in[0];
auto end_in = begin_in + in.size(); const T* end_in = begin_in + in.size();
auto p_out = &out[0];
for (auto p_in = begin_in; p_in != end_in; ++p_in) T* __restrict p_out = &out[0];
for (const T* __restrict p_in = begin_in; p_in != end_in; ++p_in)
{ {
*p_out = Op::compute(*p_in, divisor); *p_out = Op::compute(*p_in, divisor);
++p_out; ++p_out;
@ -545,43 +545,45 @@ namespace DB
static inline void apply(const PODArray<T> & in, size_t scale, typename ColumnVector<T>::Container_t & out) static inline void apply(const PODArray<T> & in, size_t scale, typename ColumnVector<T>::Container_t & out)
{ {
Scale mm_scale; Scale mm_scale;
Op::prepareScale(scale, mm_scale); Op::prepare(scale, mm_scale);
const size_t data_count = std::tuple_size<Data>(); const size_t data_count = std::tuple_size<Data>();
auto begin_in = &in[0]; const T* begin_in = &in[0];
auto end_in = begin_in + in.size(); const T* end_in = begin_in + in.size();
auto limit = end_in - (data_count - 1);
auto begin_out = &out[0]; T* begin_out = &out[0];
auto end_out = begin_out + out.size(); const T* end_out = begin_out + out.size();
auto p_in = begin_in; const T* limit = end_in - (data_count - 1);
auto p_out = begin_out;
const T* __restrict p_in = begin_in;
T* __restrict p_out = begin_out;
for (; p_in < limit; p_in += data_count) for (; p_in < limit; p_in += data_count)
{ {
Op::compute(reinterpret_cast<const T *>(p_in), mm_scale, reinterpret_cast<T *>(p_out)); Op::compute(p_in, mm_scale, p_out);
p_out += data_count; p_out += data_count;
} }
if (p_in < end_in) if (p_in < end_in)
{ {
Data tmp{0}; Data tmp{0};
auto begin_tmp = &tmp[0]; T* begin_tmp = &tmp[0];
auto end_tmp = begin_tmp + data_count; const T* end_tmp = begin_tmp + data_count;
for (auto p_tmp = begin_tmp; (p_tmp != end_tmp) && (p_in != end_in); ++p_tmp) for (T* __restrict p_tmp = begin_tmp; (p_tmp != end_tmp) && (p_in != end_in); ++p_tmp)
{ {
*p_tmp = *p_in; *p_tmp = *p_in;
++p_in; ++p_in;
} }
Data res; Data res;
const T* begin_res = &res[0];
const T* end_res = begin_res + data_count;
Op::compute(reinterpret_cast<T *>(&tmp), mm_scale, reinterpret_cast<T *>(&res)); Op::compute(reinterpret_cast<T *>(&tmp), mm_scale, reinterpret_cast<T *>(&res));
auto begin_res = &res[0]; for (const T* __restrict p_res = begin_res; (p_res != end_res) && (p_out != end_out); ++p_res)
auto end_res = begin_res + data_count;
for (auto p_res = begin_res; (p_res != end_res) && (p_out != end_out); ++p_res)
{ {
*p_out = *p_res; *p_out = *p_res;
++p_out; ++p_out;
@ -596,7 +598,7 @@ namespace DB
else else
{ {
Scale mm_scale; Scale mm_scale;
Op::prepareScale(scale, mm_scale); Op::prepare(scale, mm_scale);
Data tmp{0}; Data tmp{0};
tmp[0] = val; tmp[0] = val;

View File

@ -458,16 +458,16 @@
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-13 -6 -16 -13 -20 -10
-13 -6 -16 -13 -20 -10
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
0 0 0 0 0 0
0 0 0 0 0 0
-13 -13 -13 -13 -13 -13
@ -499,18 +499,18 @@
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-13 -6 -16 -13 -20 -10
-13 -6 -16 -13 -20 -10
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
@ -540,18 +540,18 @@
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-13 -6 -16 -13 -20 -10
-13 -6 -16 -13 -20 -10
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
@ -581,18 +581,18 @@
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-16 -6 -16 -10 -20 -10
-13 -6 -16 -13 -20 -10
-13 -6 -16 -13 -20 -10
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-16 84 -16 0 -100 0
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
@ -622,18 +622,18 @@
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
0 0 0 -10 -20 -10
0 0 0 -10 -20 -10
0 0 0 -10 -20 -10
0 0 0 -10 -20 -10
-13 0 0 -13 -20 -10
-13 0 0 -13 -20 -10
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
@ -663,18 +663,18 @@
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
-13 -13 -13 -13 -13 -13
0 0 0 -10 -20 -10
0 0 0 -10 -20 -10
0 0 0 -10 -20 -10
0 0 0 -10 -20 -10
-13 0 0 -13 -20 -10
-13 0 0 -13 -20 -10
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
0 0 0 0 -100 0
2.72 2.72 2.71 2.72 2.72 2.71
2.72 2.72 2.71 2.72 2.72 2.71
2.72 2.72 2.71 2.72 2.72 2.71
@ -745,12 +745,12 @@
-3 -2 -3 -3 -2 -3
-3 -2 -3 -3 -2 -3
-3 -2 -3 -3 -2 -3
0 0 0 0 -10 0
0 0 0 0 -10 0
0 0 0 0 -10 0
0 0 0 0 -10 0
-2.7 0 0 -2.7 -10 0
-2.7 0 0 -2.7 -10 0
0 0 0 0 0 0
0 0 0 0 0 0
0 0 0 0 0 0