mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Cmake: Test on having sse (#405)
* Cmake: Test on having sse * fix * wip * wip * wip * wip * wip * wip * fix * fix * fix * fix * fix * fix
This commit is contained in:
parent
cab5d44d28
commit
8b4b1534e3
@ -46,10 +46,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
|
||||
set (AARCH64 1)
|
||||
endif ()
|
||||
|
||||
if (NOT AARCH64)
|
||||
set (MACHINE_FLAGS "-msse4 -mpopcnt")
|
||||
endif ()
|
||||
|
||||
set (COMMON_WARNING_FLAGS "-Wall") # -Werror is also added inside directories with our own code.
|
||||
set (CXX_WARNING_FLAGS "-Wnon-virtual-dtor")
|
||||
|
||||
@ -110,14 +106,21 @@ if (PIPE)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -pipe")
|
||||
endif ()
|
||||
|
||||
include (cmake/test_cpu.cmake)
|
||||
|
||||
option (ARCHNATIVE "Enable -march=native compiler flag" OFF)
|
||||
if (ARCHNATIVE)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
|
||||
endif ()
|
||||
|
||||
|
||||
set (CMAKE_BUILD_COLOR_MAKEFILE ON)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} -std=gnu++1y ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS} ${MACHINE_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} -std=gnu++1y ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}")
|
||||
#set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline")
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${MACHINE_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}")
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}")
|
||||
#set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline")
|
||||
|
@ -62,7 +62,3 @@ check_cxx_source_runs("
|
||||
append_history(1,nullptr);
|
||||
}
|
||||
" HAVE_READLINE_HISTORY)
|
||||
|
||||
#if (HAVE_READLINE_HISTORY)
|
||||
# add_definitions (-D HAVE_READLINE_HISTORY)
|
||||
#endif ()
|
||||
|
63
cmake/test_cpu.cmake
Normal file
63
cmake/test_cpu.cmake
Normal file
@ -0,0 +1,63 @@
|
||||
# https://software.intel.com/sites/landingpage/IntrinsicsGuide/
|
||||
|
||||
include (CheckCXXSourceCompiles)
|
||||
|
||||
# gcc -dM -E -mno-sse2 - < /dev/null | sort > gcc-dump-nosse2
|
||||
# gcc -dM -E -msse2 - < /dev/null | sort > gcc-dump-sse2
|
||||
#define __SSE2__ 1
|
||||
#define __SSE2_MATH__ 1
|
||||
|
||||
# gcc -dM -E -msse4.1 - < /dev/null | sort > gcc-dump-sse41
|
||||
#define __SSE4_1__ 1
|
||||
|
||||
set (TEST_FLAG "-msse4.1")
|
||||
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}")
|
||||
check_cxx_source_compiles("
|
||||
#include <smmintrin.h>
|
||||
int main() {
|
||||
_mm_insert_epi8(__m128i(), 0, 0);
|
||||
return 0;
|
||||
}
|
||||
" HAVE_SSE41)
|
||||
if (HAVE_SSE41)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
|
||||
endif ()
|
||||
|
||||
|
||||
# gcc -dM -E -msse4.2 - < /dev/null | sort > gcc-dump-sse42
|
||||
#define __SSE4_2__ 1
|
||||
|
||||
set (TEST_FLAG "-msse4.2")
|
||||
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}")
|
||||
check_cxx_source_compiles("
|
||||
#include <nmmintrin.h>
|
||||
int main() {
|
||||
_mm_crc32_u64(0, 0);
|
||||
return 0;
|
||||
}
|
||||
" HAVE_SSE42)
|
||||
if (HAVE_SSE42)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
|
||||
endif ()
|
||||
|
||||
|
||||
# gcc -dM -E -mpopcnt - < /dev/null | sort > gcc-dump-popcnt
|
||||
#define __POPCNT__ 1
|
||||
|
||||
set (TEST_FLAG "-mpopcnt")
|
||||
|
||||
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}")
|
||||
check_cxx_source_compiles("
|
||||
int main() {
|
||||
__builtin_popcountll(0);
|
||||
return 0;
|
||||
}
|
||||
" HAVE_POPCNT)
|
||||
|
||||
if (HAVE_POPCNT AND NOT AARCH64)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
|
||||
endif ()
|
||||
|
||||
set (CMAKE_REQUIRED_FLAGS "")
|
||||
|
||||
# TODO: add here sse3 test if you want use it
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include <DB/Columns/IColumn.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -357,7 +357,7 @@ public:
|
||||
const UInt8 * filt_end = filt_pos + size;
|
||||
const T * data_pos = &data[0];
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
/** Чуть более оптимизированная версия.
|
||||
* Исходит из допущения, что часто куски последовательно идущих значений
|
||||
* полностью проходят или полностью не проходят фильтр.
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#define LIBDIVIDE_USE_SSE2 1
|
||||
#endif
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -60,7 +60,7 @@ private:
|
||||
UInt8 l{};
|
||||
UInt8 u{};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
|
||||
__m128i patl, patu;
|
||||
/// lower and uppercase vectors of first 16 characters of `needle`
|
||||
@ -99,7 +99,7 @@ public:
|
||||
u = u_seq[0];
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
/// for detecting leftmost position of the first symbol
|
||||
patl = _mm_set1_epi8(l);
|
||||
patu = _mm_set1_epi8(u);
|
||||
@ -160,7 +160,7 @@ public:
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
@ -225,7 +225,7 @@ public:
|
||||
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
@ -330,7 +330,7 @@ private:
|
||||
UInt8 l{};
|
||||
UInt8 u{};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
/// vectors filled with `l` and `u`, for determining leftmost position of the first symbol
|
||||
__m128i patl, patu;
|
||||
/// lower and uppercase vectors of first 16 characters of `needle`
|
||||
@ -348,7 +348,7 @@ public:
|
||||
l = static_cast<UInt8>(std::tolower(*needle));
|
||||
u = static_cast<UInt8>(std::toupper(*needle));
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
patl = _mm_set1_epi8(l);
|
||||
patu = _mm_set1_epi8(u);
|
||||
|
||||
@ -372,7 +372,7 @@ public:
|
||||
|
||||
bool compare(const UInt8 * pos) const
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
@ -424,7 +424,7 @@ public:
|
||||
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
|
||||
@ -516,7 +516,7 @@ private:
|
||||
/// first character in `needle`
|
||||
UInt8 first{};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
/// vector filled `first` for determining leftmost position of the first symbol
|
||||
__m128i pattern;
|
||||
/// vector of first 16 characters of `needle`
|
||||
@ -533,7 +533,7 @@ public:
|
||||
|
||||
first = *needle;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
pattern = _mm_set1_epi8(first);
|
||||
|
||||
auto needle_pos = needle;
|
||||
@ -554,7 +554,7 @@ public:
|
||||
|
||||
bool compare(const UInt8 * pos) const
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (page_safe(pos))
|
||||
{
|
||||
const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(pos));
|
||||
@ -604,7 +604,7 @@ public:
|
||||
|
||||
while (haystack < haystack_end)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (haystack + n <= haystack_end && page_safe(haystack))
|
||||
{
|
||||
/// find first character
|
||||
|
@ -3,9 +3,7 @@
|
||||
#include <string.h>
|
||||
#include <DB/Core/Defines.h>
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <functional>
|
||||
#include <ostream>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -35,7 +35,7 @@ using StringRefs = std::vector<StringRef>;
|
||||
|
||||
using UInt64 = DB::UInt64;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
|
||||
/** Сравнение строк на равенство.
|
||||
* Подход является спорным и выигрывает не во всех случаях.
|
||||
@ -128,7 +128,7 @@ inline bool operator== (StringRef lhs, StringRef rhs)
|
||||
if (lhs.size == 0)
|
||||
return true;
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
return memequalSSE2Wide(lhs.data, rhs.data, lhs.size);
|
||||
#else
|
||||
return 0 == memcmp(lhs.data, rhs.data, lhs.size);
|
||||
@ -169,7 +169,7 @@ struct StringRefHash64
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_2__
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
|
@ -943,7 +943,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity<NameBitNot>
|
||||
|
||||
/// Оптимизации для целочисленного деления на константу.
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#define LIBDIVIDE_USE_SSE2 1
|
||||
#endif
|
||||
|
||||
@ -981,7 +981,7 @@ struct DivideIntegralByConstantImpl
|
||||
const A * a_end = a_pos + size;
|
||||
ResultType * c_pos = &c[0];
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
static constexpr size_t values_per_sse_register = 16 / sizeof(A);
|
||||
const A * a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register;
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include <type_traits>
|
||||
#include <array>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -268,7 +268,7 @@ namespace DB
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
template <typename T>
|
||||
class BaseFloatRoundingComputation;
|
||||
|
||||
|
@ -16,9 +16,8 @@
|
||||
#include <DB/Functions/IFunction.h>
|
||||
#include <ext/range.hpp>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
@ -232,7 +231,7 @@ private:
|
||||
{
|
||||
const auto flip_case_mask = 'A' ^ 'a';
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
const auto bytes_sse = sizeof(__m128i);
|
||||
const auto src_end_sse = src_end - (src_end - src) % bytes_sse;
|
||||
|
||||
@ -393,7 +392,7 @@ private:
|
||||
|
||||
static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
const auto bytes_sse = sizeof(__m128i);
|
||||
auto src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -20,7 +20,7 @@ size_t countBytesInFilter(const IColumn::Filter & filt)
|
||||
const Int8 * pos = reinterpret_cast<const Int8 *>(&filt[0]);
|
||||
const Int8 * end = pos + filt.size();
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__ && __POPCNT__
|
||||
const __m128i zero16 = _mm_setzero_si128();
|
||||
const Int8 * end64 = pos + filt.size() / 64 * 64;
|
||||
|
||||
@ -95,7 +95,7 @@ void filterArraysImpl(
|
||||
memcpy(&res_elems[elems_size_old], &src_elems[offset], size * sizeof(T));
|
||||
};
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
const __m128i zero_vec = _mm_setzero_si128();
|
||||
static constexpr size_t SIMD_BYTES = 16;
|
||||
const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <DB/IO/WriteHelpers.h>
|
||||
#include <DB/IO/VarInt.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -125,7 +125,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, Column
|
||||
|
||||
if (size)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
/// Оптимистичная ветка, в которой возможно более эффективное копирование.
|
||||
if (offset + 16 * UNROLL_TIMES <= data.allocated_size() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end())
|
||||
{
|
||||
|
@ -1,7 +1,3 @@
|
||||
#if defined(__x86_64__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <mysqlxx/Manip.h>
|
||||
|
@ -2,8 +2,8 @@
|
||||
#include <DB/IO/WriteBufferValidUTF8.h>
|
||||
#include <DB/Core/Types.h>
|
||||
|
||||
#ifdef __x86_64__
|
||||
#include <emmintrin.h>
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ void WriteBufferValidUTF8::nextImpl()
|
||||
|
||||
while (p < pos)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
#if __SSE2__
|
||||
/// Fast skip of ASCII
|
||||
static constexpr size_t SIMD_BYTES = 16;
|
||||
const char * simd_end = p + (pos - p) / SIMD_BYTES * SIMD_BYTES;
|
||||
|
@ -1,7 +1,3 @@
|
||||
#if defined(__x86_64__)
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
@ -1,7 +1,3 @@
|
||||
#if defined(__x86_64__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
@ -11,22 +7,6 @@
|
||||
#include <DB/Common/Stopwatch.h>
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
std::ostream & operator<< (std::ostream & ostr, const __m128i vec)
|
||||
{
|
||||
char digits[16];
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(digits), vec);
|
||||
|
||||
ostr << "{";
|
||||
for (size_t i = 0; i < 16; ++i)
|
||||
ostr << (i ? ", " : "") << static_cast<int>(digits[i]);
|
||||
ostr << "}";
|
||||
|
||||
return ostr;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
namespace test
|
||||
{
|
||||
template <typename T>
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <DB/Common/HashTable/HashMap.h>
|
||||
#include <DB/Interpreters/AggregationCommon.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -77,7 +77,7 @@ DefineStringRef(StringRef_Compare16_1_byMemcmp)
|
||||
DefineStringRef(StringRef_Compare16_1_byUInt64_logicAnd)
|
||||
DefineStringRef(StringRef_Compare16_1_byUInt64_bitAnd)
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
DefineStringRef(StringRef_Compare16_1_byIntSSE)
|
||||
DefineStringRef(StringRef_Compare16_1_byFloatSSE)
|
||||
DefineStringRef(StringRef_Compare16_1_bySSE4)
|
||||
@ -196,7 +196,7 @@ inline bool compare_byUInt64_bitAnd(const char * p1, const char * p2)
|
||||
& (reinterpret_cast<const UInt64 *>(p1)[1] == reinterpret_cast<const UInt64 *>(p2)[1]);
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
|
||||
inline bool compare_byIntSSE(const char * p1, const char * p2)
|
||||
{
|
||||
@ -263,7 +263,7 @@ inline bool memequal(const char * p1, const char * p2, size_t size)
|
||||
}
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
|
||||
inline bool memequal_sse41(const char * p1, const char * p2, size_t size)
|
||||
{
|
||||
@ -514,7 +514,7 @@ Op(byMemcmp)
|
||||
Op(byUInt64_logicAnd)
|
||||
Op(byUInt64_bitAnd)
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
|
||||
Op(byIntSSE)
|
||||
Op(byFloatSSE)
|
||||
@ -642,7 +642,7 @@ int main(int argc, char ** argv)
|
||||
if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp> (data, "StringRef_Compare16_1_byMemcmp");
|
||||
if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd");
|
||||
if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd> (data, "StringRef_Compare16_1_byUInt64_bitAnd");
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE> (data, "StringRef_Compare16_1_byIntSSE");
|
||||
if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE> (data, "StringRef_Compare16_1_byFloatSSE");
|
||||
if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4> (data, "StringRef_Compare16_1_bySSE4");
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <DB/Common/HashTable/HashMap.h>
|
||||
#include <DB/Interpreters/AggregationCommon.h>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -139,7 +139,7 @@ struct FastHash64
|
||||
};
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
|
||||
struct CrapWow
|
||||
{
|
||||
@ -229,7 +229,7 @@ struct SimpleHash
|
||||
|
||||
if (size < 8)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
return hashLessThan8(x.data, x.size);
|
||||
#endif
|
||||
}
|
||||
@ -266,7 +266,7 @@ struct VerySimpleHash
|
||||
|
||||
if (size < 8)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
return hashLessThan8(x.data, x.size);
|
||||
#endif
|
||||
}
|
||||
@ -316,7 +316,7 @@ struct MetroHash64
|
||||
};
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
|
||||
/*struct CRC32Hash
|
||||
{
|
||||
@ -466,7 +466,7 @@ int main(int argc, char ** argv)
|
||||
if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64> (data, "StringRef_FastHash64");
|
||||
if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash> (data, "StringRef_SimpleHash");
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_1__
|
||||
if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow> (data, "StringRef_CrapWow");
|
||||
if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash> (data, "StringRef_CRC32Hash");
|
||||
if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash> (data, "StringRef_CRC32ILPHash");
|
||||
|
@ -64,7 +64,7 @@ inline bool operator==(SmallStringRef lhs, SmallStringRef rhs)
|
||||
if (lhs.size == 0)
|
||||
return true;
|
||||
|
||||
#if __x86_64__
|
||||
#if __SSE2__
|
||||
return memequalSSE2Wide(lhs.data(), rhs.data(), lhs.size);
|
||||
#else
|
||||
return false;
|
||||
|
@ -1,6 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#if __SSE4_2__
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -35,7 +38,7 @@ inline bool is_in(char x)
|
||||
return x == s0 || is_in<s1, tail...>(x);
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
template <char s0>
|
||||
inline __m128i mm_is_in(__m128i bytes)
|
||||
{
|
||||
@ -56,7 +59,7 @@ inline __m128i mm_is_in(__m128i bytes)
|
||||
template <char... symbols>
|
||||
inline const char * find_first_symbols_sse2(const char * begin, const char * end)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
for (; begin + 15 < end; begin += 16)
|
||||
{
|
||||
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(begin));
|
||||
@ -83,7 +86,7 @@ template <size_t num_chars,
|
||||
char c13 = 0, char c14 = 0, char c15 = 0, char c16 = 0>
|
||||
inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE4_2__
|
||||
#define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT)
|
||||
__m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16);
|
||||
|
||||
@ -131,8 +134,10 @@ inline const char * find_first_symbols_sse42(const char * begin, const char * en
|
||||
template <char... symbols>
|
||||
inline const char * find_first_symbols(const char * begin, const char * end)
|
||||
{
|
||||
#if __SSE4_2__
|
||||
if (sizeof...(symbols) >= 5)
|
||||
return detail::find_first_symbols_sse42<symbols...>(begin, end);
|
||||
else
|
||||
#endif
|
||||
return detail::find_first_symbols_sse2<symbols...>(begin, end);
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
@ -15,7 +15,7 @@
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
#if defined(__x86_64__)
|
||||
#if __SSE2__
|
||||
try
|
||||
{
|
||||
DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);
|
||||
|
Loading…
Reference in New Issue
Block a user