dbms: porting to aarch64 [#METR-19609].

This commit is contained in:
Alexey Milovidov 2016-01-14 00:05:11 +03:00
parent fefce00f5d
commit e513e9808b
10 changed files with 107 additions and 31 deletions

View File

@ -10,6 +10,10 @@
#include <DB/Columns/IColumn.h> #include <DB/Columns/IColumn.h>
#if defined(__x86_64__)
#include <emmintrin.h>
#endif
namespace DB namespace DB
{ {
@ -288,17 +292,20 @@ public:
if (result_size_hint) if (result_size_hint)
res_data.reserve(result_size_hint > 0 ? result_size_hint : size); res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
/** Чуть более оптимизированная версия.
* Исходит из допущения, что часто куски последовательно идущих значений
* полностью проходят или полностью не проходят фильтр.
* Поэтому, будем оптимистично проверять куски по 16 значений.
*/
const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_pos = &filt[0];
const UInt8 * filt_end = filt_pos + size; const UInt8 * filt_end = filt_pos + size;
const UInt8 * filt_end_sse = filt_pos + size / 16 * 16;
const T * data_pos = &data[0]; const T * data_pos = &data[0];
#if defined(__x86_64__)
/** Чуть более оптимизированная версия.
* Исходит из допущения, что часто куски последовательно идущих значений
* полностью проходят или полностью не проходят фильтр.
* Поэтому, будем оптимистично проверять куски по SIMD_BYTES значений.
*/
static constexpr size_t SIMD_BYTES = 16;
const __m128i zero16 = _mm_setzero_si128(); const __m128i zero16 = _mm_setzero_si128();
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_sse) while (filt_pos < filt_end_sse)
{ {
@ -310,18 +317,19 @@ public:
} }
else if (0xFFFF == mask) else if (0xFFFF == mask)
{ {
res_data.insert(data_pos, data_pos + 16); res_data.insert(data_pos, data_pos + SIMD_BYTES);
} }
else else
{ {
for (size_t i = 0; i < 16; ++i) for (size_t i = 0; i < SIMD_BYTES; ++i)
if (filt_pos[i]) if (filt_pos[i])
res_data.push_back(data_pos[i]); res_data.push_back(data_pos[i]);
} }
filt_pos += 16; filt_pos += SIMD_BYTES;
data_pos += 16; data_pos += SIMD_BYTES;
} }
#endif
while (filt_pos < filt_end) while (filt_pos < filt_end)
{ {

View File

@ -913,7 +913,10 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameBitNot>
/// Оптимизации для целочисленного деления на константу. /// Оптимизации для целочисленного деления на константу.
#define LIBDIVIDE_USE_SSE2 1 #if defined(__x86_64__)
#define LIBDIVIDE_USE_SSE2 1
#endif
#include <libdivide.h> #include <libdivide.h>
@ -947,6 +950,8 @@ struct DivideIntegralByConstantImpl
const A * a_pos = &a[0]; const A * a_pos = &a[0];
const A * a_end = a_pos + size; const A * a_end = a_pos + size;
ResultType * c_pos = &c[0]; ResultType * c_pos = &c[0];
#if defined(__x86_64__)
static constexpr size_t values_per_sse_register = 16 / sizeof(A); static constexpr size_t values_per_sse_register = 16 / sizeof(A);
const A * a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register; const A * a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register;
@ -958,6 +963,7 @@ struct DivideIntegralByConstantImpl
a_pos += values_per_sse_register; a_pos += values_per_sse_register;
c_pos += values_per_sse_register; c_pos += values_per_sse_register;
} }
#endif
while (a_pos < a_end) while (a_pos < a_end)
{ {

View File

@ -17,8 +17,10 @@
#include <DB/Functions/IFunction.h> #include <DB/Functions/IFunction.h>
#include <ext/range.hpp> #include <ext/range.hpp>
#include <emmintrin.h> #if defined(__x86_64__)
#include <nmmintrin.h> #include <emmintrin.h>
#include <nmmintrin.h>
#endif
namespace DB namespace DB
@ -233,11 +235,12 @@ struct LowerUpperImpl
private: private:
static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
{ {
const auto flip_case_mask = 'A' ^ 'a';
#if defined(__x86_64__)
const auto bytes_sse = sizeof(__m128i); const auto bytes_sse = sizeof(__m128i);
const auto src_end_sse = src_end - (src_end - src) % bytes_sse; const auto src_end_sse = src_end - (src_end - src) % bytes_sse;
const auto flip_case_mask = 'A' ^ 'a';
const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1);
const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1);
const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask);
@ -260,6 +263,7 @@ private:
/// store result back to destination /// store result back to destination
_mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars);
} }
#endif
for (; src < src_end; ++src, ++dst) for (; src < src_end; ++src, ++dst)
if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) if (*src >= not_case_lower_bound && *src <= not_case_upper_bound)
@ -394,6 +398,7 @@ private:
static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
{ {
#if defined(__x86_64__)
const auto bytes_sse = sizeof(__m128i); const auto bytes_sse = sizeof(__m128i);
auto src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse; auto src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse;
@ -455,7 +460,7 @@ private:
} }
} }
} }
#endif
/// handle remaining symbols /// handle remaining symbols
while (src < src_end) while (src < src_end)
toCase(src, src_end, dst); toCase(src, src_end, dst);

View File

@ -1,4 +1,6 @@
#include <emmintrin.h> #if defined(__x86_64__)
#include <emmintrin.h>
#endif
#include <DB/Columns/IColumn.h> #include <DB/Columns/IColumn.h>
@ -15,10 +17,11 @@ size_t countBytesInFilter(const IColumn::Filter & filt)
* Лучше было бы использовать != 0, то это не позволяет SSE2. * Лучше было бы использовать != 0, то это не позволяет SSE2.
*/ */
const __m128i zero16 = _mm_setzero_si128();
const Int8 * pos = reinterpret_cast<const Int8 *>(&filt[0]); const Int8 * pos = reinterpret_cast<const Int8 *>(&filt[0]);
const Int8 * end = pos + filt.size(); const Int8 * end = pos + filt.size();
#if defined(__x86_64__)
const __m128i zero16 = _mm_setzero_si128();
const Int8 * end64 = pos + filt.size() / 64 * 64; const Int8 * end64 = pos + filt.size() / 64 * 64;
for (; pos < end64; pos += 64) for (; pos < end64; pos += 64)
@ -35,6 +38,7 @@ size_t countBytesInFilter(const IColumn::Filter & filt)
| (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8( | (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)), _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos + 48)),
zero16))) << 48)); zero16))) << 48));
#endif
for (; pos < end; ++pos) for (; pos < end; ++pos)
count += *pos > 0; count += *pos > 0;
@ -71,17 +75,12 @@ void filterArraysImpl(
IColumn::Offset_t current_src_offset = 0; IColumn::Offset_t current_src_offset = 0;
static constexpr size_t SIMD_BYTES = 16;
const UInt8 * filt_pos = &filt[0]; const UInt8 * filt_pos = &filt[0];
const auto filt_end = filt_pos + size; const auto filt_end = filt_pos + size;
const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
auto offsets_pos = &src_offsets[0]; auto offsets_pos = &src_offsets[0];
const auto offsets_begin = offsets_pos; const auto offsets_begin = offsets_pos;
const __m128i zero_vec = _mm_setzero_si128();
/// copy array ending at *end_offset_ptr /// copy array ending at *end_offset_ptr
const auto copy_array = [&] (const IColumn::Offset_t * offset_ptr) const auto copy_array = [&] (const IColumn::Offset_t * offset_ptr)
{ {
@ -96,6 +95,11 @@ void filterArraysImpl(
memcpy(&res_elems[elems_size_old], &src_elems[offset], size * sizeof(T)); memcpy(&res_elems[elems_size_old], &src_elems[offset], size * sizeof(T));
}; };
#if defined(__x86_64__)
const __m128i zero_vec = _mm_setzero_si128();
static constexpr size_t SIMD_BYTES = 16;
const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_aligned) while (filt_pos < filt_end_aligned)
{ {
const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8( const auto mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
@ -149,6 +153,7 @@ void filterArraysImpl(
filt_pos += SIMD_BYTES; filt_pos += SIMD_BYTES;
offsets_pos += SIMD_BYTES; offsets_pos += SIMD_BYTES;
} }
#endif
while (filt_pos < filt_end) while (filt_pos < filt_end)
{ {

View File

@ -12,7 +12,9 @@
#include <DB/IO/WriteHelpers.h> #include <DB/IO/WriteHelpers.h>
#include <DB/IO/VarInt.h> #include <DB/IO/VarInt.h>
#include <emmintrin.h> #if defined(__x86_64__)
#include <emmintrin.h>
#endif
namespace DB namespace DB
@ -91,6 +93,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, Column
if (size) if (size)
{ {
#if defined(__x86_64__)
/// Оптимистичная ветка, в которой возможно более эффективное копирование. /// Оптимистичная ветка, в которой возможно более эффективное копирование.
if (offset + 16 * UNROLL_TIMES <= data.capacity() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end()) if (offset + 16 * UNROLL_TIMES <= data.capacity() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end())
{ {
@ -121,6 +124,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, Column
istr.position() += size; istr.position() += size;
} }
else else
#endif
{ {
istr.readStrict(reinterpret_cast<char*>(&data[offset - size - 1]), size); istr.readStrict(reinterpret_cast<char*>(&data[offset - size - 1]), size);
} }

View File

@ -1,4 +1,6 @@
#include <emmintrin.h> #if defined(__x86_64__)
#include <emmintrin.h>
#endif
#include <sstream> #include <sstream>
@ -112,6 +114,7 @@ void readStringUntilEOF(String & s, ReadBuffer & buf)
*/ */
static inline const char * find_first_tab_lf_or_backslash(const char * begin, const char * end) static inline const char * find_first_tab_lf_or_backslash(const char * begin, const char * end)
{ {
#if defined(__x86_64__)
static const char tab_chars[16] = {'\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t'}; static const char tab_chars[16] = {'\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t'};
static const char lf_chars[16] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; static const char lf_chars[16] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
static const char bs_chars[16] = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'}; static const char bs_chars[16] = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'};
@ -140,6 +143,7 @@ static inline const char * find_first_tab_lf_or_backslash(const char * begin, co
if (bit_mask) if (bit_mask)
return begin + __builtin_ctz(bit_mask); return begin + __builtin_ctz(bit_mask);
} }
#endif
for (; begin < end; ++begin) for (; begin < end; ++begin)
if (*begin == '\t' || *begin == '\n' || *begin == '\\') if (*begin == '\t' || *begin == '\n' || *begin == '\\')
@ -202,6 +206,7 @@ void readEscapedString(DB::String & s, DB::ReadBuffer & buf)
template <char quote> template <char quote>
static inline const char * find_first_quote_or_backslash(const char * begin, const char * end) static inline const char * find_first_quote_or_backslash(const char * begin, const char * end)
{ {
#if defined(__x86_64__)
static const char quote_chars[16] = {quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote}; static const char quote_chars[16] = {quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote, quote};
static const char bs_chars[16] = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; static const char bs_chars[16] = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
@ -226,6 +231,7 @@ static inline const char * find_first_quote_or_backslash(const char * begin, con
if (bit_mask) if (bit_mask)
return begin + __builtin_ctz(bit_mask); return begin + __builtin_ctz(bit_mask);
} }
#endif
for (; begin < end; ++begin) for (; begin < end; ++begin)
if (*begin == quote || *begin == '\\') if (*begin == quote || *begin == '\\')

View File

@ -1,4 +1,6 @@
#include <emmintrin.h> #if defined(__x86_64__)
#include <emmintrin.h>
#endif
#include <string> #include <string>
#include <iostream> #include <iostream>
@ -35,6 +37,7 @@ namespace test
*/ */
static inline const char * find_first_tab_lf_or_backslash(const char * begin, const char * end) static inline const char * find_first_tab_lf_or_backslash(const char * begin, const char * end)
{ {
#if defined(__x86_64__)
static const char tab_chars[16] = {'\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t'}; static const char tab_chars[16] = {'\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t', '\t'};
static const char lf_chars[16] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; static const char lf_chars[16] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
static const char bs_chars[16] = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'}; static const char bs_chars[16] = {'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'};
@ -63,7 +66,7 @@ namespace test
if (bit_mask) if (bit_mask)
return begin + __builtin_ctz(bit_mask); return begin + __builtin_ctz(bit_mask);
} }
#endif
for (; begin < end; ++begin) for (; begin < end; ++begin)
if (*begin == '\t' || *begin == '\n' || *begin == '\\') if (*begin == '\t' || *begin == '\n' || *begin == '\\')
return begin; return begin;

View File

@ -1,4 +1,6 @@
#include <emmintrin.h> #if defined(__x86_64__)
#include <emmintrin.h>
#endif
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
@ -9,6 +11,7 @@
#include <DB/Common/Stopwatch.h> #include <DB/Common/Stopwatch.h>
#if defined(__x86_64__)
std::ostream & operator<< (std::ostream & ostr, const __m128i vec) std::ostream & operator<< (std::ostream & ostr, const __m128i vec)
{ {
char digits[16]; char digits[16];
@ -21,6 +24,7 @@ std::ostream & operator<< (std::ostream & ostr, const __m128i vec)
return ostr; return ostr;
} }
#endif
namespace test namespace test

View File

@ -15,7 +15,9 @@
#include <DB/Common/HashTable/HashMap.h> #include <DB/Common/HashTable/HashMap.h>
#include <DB/Interpreters/AggregationCommon.h> #include <DB/Interpreters/AggregationCommon.h>
#include <smmintrin.h> #if defined(__x86_64__)
#include <smmintrin.h>
#endif
/** Выполнять так: /** Выполнять так:
@ -74,11 +76,15 @@ DefineStringRef(StringRef_Compare8_1_byUInt64)
DefineStringRef(StringRef_Compare16_1_byMemcmp) DefineStringRef(StringRef_Compare16_1_byMemcmp)
DefineStringRef(StringRef_Compare16_1_byUInt64_logicAnd) DefineStringRef(StringRef_Compare16_1_byUInt64_logicAnd)
DefineStringRef(StringRef_Compare16_1_byUInt64_bitAnd) DefineStringRef(StringRef_Compare16_1_byUInt64_bitAnd)
#if defined(__x86_64__)
DefineStringRef(StringRef_Compare16_1_byIntSSE) DefineStringRef(StringRef_Compare16_1_byIntSSE)
DefineStringRef(StringRef_Compare16_1_byFloatSSE) DefineStringRef(StringRef_Compare16_1_byFloatSSE)
DefineStringRef(StringRef_Compare16_1_bySSE4) DefineStringRef(StringRef_Compare16_1_bySSE4)
DefineStringRef(StringRef_Compare16_1_bySSE4_wide) DefineStringRef(StringRef_Compare16_1_bySSE4_wide)
DefineStringRef(StringRef_Compare16_1_bySSE_wide) DefineStringRef(StringRef_Compare16_1_bySSE_wide)
#endif
DefineStringRef(StringRef_CompareAlwaysTrue) DefineStringRef(StringRef_CompareAlwaysTrue)
DefineStringRef(StringRef_CompareAlmostAlwaysTrue) DefineStringRef(StringRef_CompareAlmostAlwaysTrue)
@ -190,6 +196,8 @@ inline bool compare_byUInt64_bitAnd(const char * p1, const char * p2)
& (reinterpret_cast<const uint64_t *>(p1)[1] == reinterpret_cast<const uint64_t *>(p2)[1]); & (reinterpret_cast<const uint64_t *>(p1)[1] == reinterpret_cast<const uint64_t *>(p2)[1]);
} }
#if defined(__x86_64__)
inline bool compare_byIntSSE(const char * p1, const char * p2) inline bool compare_byIntSSE(const char * p1, const char * p2)
{ {
return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8( return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8(
@ -204,6 +212,8 @@ inline bool compare_byFloatSSE(const char * p1, const char * p2)
_mm_loadu_ps(reinterpret_cast<const float *>(p2)))); _mm_loadu_ps(reinterpret_cast<const float *>(p2))));
} }
#endif
template <bool compare(const char *, const char *)> template <bool compare(const char *, const char *)>
inline bool memequal(const char * p1, const char * p2, size_t size) inline bool memequal(const char * p1, const char * p2, size_t size)
@ -253,6 +263,8 @@ inline bool memequal(const char * p1, const char * p2, size_t size)
} }
#if defined(__x86_64__)
inline bool memequal_sse41(const char * p1, const char * p2, size_t size) inline bool memequal_sse41(const char * p1, const char * p2, size_t size)
{ {
// const char * p1_end = p1 + size; // const char * p1_end = p1 + size;
@ -483,6 +495,8 @@ inline bool memequal_sse_wide(const char * p1, const char * p2, size_t size)
return true; return true;
} }
#endif
#define Op(METHOD) \ #define Op(METHOD) \
inline bool operator==(StringRef_Compare16_1_ ## METHOD lhs, StringRef_Compare16_1_ ## METHOD rhs) \ inline bool operator==(StringRef_Compare16_1_ ## METHOD lhs, StringRef_Compare16_1_ ## METHOD rhs) \
@ -499,6 +513,9 @@ inline bool operator==(StringRef_Compare16_1_ ## METHOD lhs, StringRef_Compare16
Op(byMemcmp) Op(byMemcmp)
Op(byUInt64_logicAnd) Op(byUInt64_logicAnd)
Op(byUInt64_bitAnd) Op(byUInt64_bitAnd)
#if defined(__x86_64__)
Op(byIntSSE) Op(byIntSSE)
Op(byFloatSSE) Op(byFloatSSE)
@ -536,6 +553,8 @@ inline bool operator==(StringRef_Compare16_1_bySSE_wide lhs, StringRef_Compare16
return memequal_sse_wide(lhs.data, rhs.data, lhs.size); return memequal_sse_wide(lhs.data, rhs.data, lhs.size);
} }
#endif
inline bool operator==(StringRef_CompareAlwaysTrue lhs, StringRef_CompareAlwaysTrue rhs) inline bool operator==(StringRef_CompareAlwaysTrue lhs, StringRef_CompareAlwaysTrue rhs)
{ {
@ -623,11 +642,13 @@ int main(int argc, char ** argv)
if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp> (data, "StringRef_Compare16_1_byMemcmp"); if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp> (data, "StringRef_Compare16_1_byMemcmp");
if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd"); if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd");
if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd> (data, "StringRef_Compare16_1_byUInt64_bitAnd"); if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd> (data, "StringRef_Compare16_1_byUInt64_bitAnd");
#if defined(__x86_64__)
if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE> (data, "StringRef_Compare16_1_byIntSSE"); if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE> (data, "StringRef_Compare16_1_byIntSSE");
if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE> (data, "StringRef_Compare16_1_byFloatSSE"); if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE> (data, "StringRef_Compare16_1_byFloatSSE");
if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4> (data, "StringRef_Compare16_1_bySSE4"); if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4> (data, "StringRef_Compare16_1_bySSE4");
if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide> (data, "StringRef_Compare16_1_bySSE4_wide"); if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide> (data, "StringRef_Compare16_1_bySSE4_wide");
if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide> (data, "StringRef_Compare16_1_bySSE_wide"); if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide> (data, "StringRef_Compare16_1_bySSE_wide");
#endif
if (!m || m == 100) bench<StringRef_CompareAlwaysTrue> (data, "StringRef_CompareAlwaysTrue"); if (!m || m == 100) bench<StringRef_CompareAlwaysTrue> (data, "StringRef_CompareAlwaysTrue");
if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue> (data, "StringRef_CompareAlmostAlwaysTrue"); if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue> (data, "StringRef_CompareAlmostAlwaysTrue");

View File

@ -18,7 +18,9 @@
#include <DB/Common/HashTable/HashMap.h> #include <DB/Common/HashTable/HashMap.h>
#include <DB/Interpreters/AggregationCommon.h> #include <DB/Interpreters/AggregationCommon.h>
#include <smmintrin.h> #if defined(__x86_64__)
#include <smmintrin.h>
#endif
/** Выполнять так: /** Выполнять так:
@ -137,6 +139,8 @@ struct FastHash64
}; };
#if defined(__x86_64__)
struct CrapWow struct CrapWow
{ {
size_t operator() (StringRef x) const size_t operator() (StringRef x) const
@ -206,6 +210,8 @@ struct CrapWow
} }
}; };
#endif
struct SimpleHash struct SimpleHash
{ {
@ -306,6 +312,8 @@ struct MetroHash64
}; };
#if defined(__x86_64__)
/*struct CRC32Hash /*struct CRC32Hash
{ {
size_t operator() (StringRef x) const size_t operator() (StringRef x) const
@ -383,6 +391,8 @@ struct CRC32ILPHash
} }
}; };
#endif
typedef UInt64 Value; typedef UInt64 Value;
@ -451,9 +461,13 @@ int main(int argc, char ** argv)
if (!m || m == 1) bench<StringRef_CompareMemcmp, DefaultHash<StringRef>>(data, "StringRef_CityHash64"); if (!m || m == 1) bench<StringRef_CompareMemcmp, DefaultHash<StringRef>>(data, "StringRef_CityHash64");
if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64> (data, "StringRef_FastHash64"); if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64> (data, "StringRef_FastHash64");
if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash> (data, "StringRef_SimpleHash"); if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash> (data, "StringRef_SimpleHash");
#if defined(__x86_64__)
if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow> (data, "StringRef_CrapWow"); if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow> (data, "StringRef_CrapWow");
if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash> (data, "StringRef_CRC32Hash"); if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash> (data, "StringRef_CRC32Hash");
if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash> (data, "StringRef_CRC32ILPHash"); if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash> (data, "StringRef_CRC32ILPHash");
#endif
if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash"); if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash");
if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64"); if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64");
if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1"); if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1");