diff --git a/CMakeLists.txt b/CMakeLists.txt index 559291e8c34..06496bde59b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -46,10 +46,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") set (AARCH64 1) endif () -if (NOT AARCH64) - set (MACHINE_FLAGS "-msse4 -mpopcnt") -endif () - set (COMMON_WARNING_FLAGS "-Wall") # -Werror is also added inside directories with our own code. set (CXX_WARNING_FLAGS "-Wnon-virtual-dtor") @@ -110,14 +106,21 @@ if (PIPE) set (COMPILER_FLAGS "${COMPILER_FLAGS} -pipe") endif () +include (cmake/test_cpu.cmake) + +option (ARCHNATIVE "Enable -march=native compiler flag" OFF) +if (ARCHNATIVE) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native") +endif () + set (CMAKE_BUILD_COLOR_MAKEFILE ON) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} -std=gnu++1y ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS} ${MACHINE_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} -std=gnu++1y ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}") #set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3") set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline") -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${MACHINE_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}") #set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline") diff --git a/cmake/find_readline_edit.cmake b/cmake/find_readline_edit.cmake index 3bf70c6a4a0..a3f0d4ddd71 100644 --- a/cmake/find_readline_edit.cmake +++ b/cmake/find_readline_edit.cmake @@ -62,7 +62,3 @@ check_cxx_source_runs(" append_history(1,nullptr); } " HAVE_READLINE_HISTORY) - -#if (HAVE_READLINE_HISTORY) -# add_definitions (-D HAVE_READLINE_HISTORY) -#endif () diff --git a/cmake/test_cpu.cmake b/cmake/test_cpu.cmake new file mode 100644 index 00000000000..699d8fe7b92 --- /dev/null +++ b/cmake/test_cpu.cmake @@ -0,0 +1,63 @@ +# https://software.intel.com/sites/landingpage/IntrinsicsGuide/ + +include (CheckCXXSourceCompiles) + +# gcc -dM -E -mno-sse2 - < /dev/null | sort > gcc-dump-nosse2 +# gcc -dM -E -msse2 - < /dev/null | sort > gcc-dump-sse2 +#define __SSE2__ 1 +#define __SSE2_MATH__ 1 + +# gcc -dM -E -msse4.1 - < /dev/null | sort > gcc-dump-sse41 +#define __SSE4_1__ 1 + +set (TEST_FLAG "-msse4.1") +set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}") +check_cxx_source_compiles(" + #include + int main() { + _mm_insert_epi8(__m128i(), 0, 0); + return 0; + } +" HAVE_SSE41) +if (HAVE_SSE41) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") +endif () + + +# gcc -dM -E -msse4.2 - < /dev/null | sort > gcc-dump-sse42 +#define __SSE4_2__ 1 + +set (TEST_FLAG "-msse4.2") +set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}") +check_cxx_source_compiles(" + #include + int main() { + _mm_crc32_u64(0, 0); + return 0; + } +" HAVE_SSE42) +if (HAVE_SSE42) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") +endif () + + +# gcc -dM -E -mpopcnt - < /dev/null | sort > gcc-dump-popcnt +#define __POPCNT__ 1 + +set (TEST_FLAG "-mpopcnt") + +set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG}") +check_cxx_source_compiles(" + int main() { + __builtin_popcountll(0); + return 0; + } +" HAVE_POPCNT) + +if (HAVE_POPCNT AND NOT AARCH64) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") +endif () + +set (CMAKE_REQUIRED_FLAGS "") + +# TODO: add here sse3 test if you want use it diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index 6359b253525..8fbf920f02a 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -12,7 +12,7 @@ #include -#if defined(__x86_64__) +#if __SSE2__ #include #endif @@ -357,7 +357,7 @@ public: const UInt8 * filt_end = filt_pos + size; const T * data_pos = &data[0]; -#if defined(__x86_64__) +#if __SSE2__ /** Чуть более оптимизированная версия. * Исходит из допущения, что часто куски последовательно идущих значений * полностью проходят или полностью не проходят фильтр. diff --git a/dbms/include/DB/Common/BlockFilterCreator.h b/dbms/include/DB/Common/BlockFilterCreator.h index aa17ec04953..75a6c736cf9 100644 --- a/dbms/include/DB/Common/BlockFilterCreator.h +++ b/dbms/include/DB/Common/BlockFilterCreator.h @@ -5,7 +5,7 @@ #include -#if defined(__x86_64__) +#if __SSE2__ #define LIBDIVIDE_USE_SSE2 1 #endif diff --git a/dbms/include/DB/Common/StringSearcher.h b/dbms/include/DB/Common/StringSearcher.h index 5540aeb3abe..94b1933897d 100644 --- a/dbms/include/DB/Common/StringSearcher.h +++ b/dbms/include/DB/Common/StringSearcher.h @@ -7,7 +7,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE4_1__ #include #endif @@ -60,7 +60,7 @@ private: UInt8 l{}; UInt8 u{}; -#if defined(__x86_64__) +#if __SSE4_1__ /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol __m128i patl, patu; /// lower and uppercase vectors of first 16 characters of `needle` @@ -99,7 +99,7 @@ public: u = u_seq[0]; } -#if defined(__x86_64__) +#if __SSE4_1__ /// for detecting leftmost position of the first symbol patl = _mm_set1_epi8(l); patu = _mm_set1_epi8(u); @@ -160,7 +160,7 @@ public: { static const Poco::UTF8Encoding utf8; -#if defined(__x86_64__) +#if __SSE4_1__ if (page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -225,7 +225,7 @@ public: while (haystack < haystack_end) { -#if defined(__x86_64__) +#if __SSE4_1__ if (haystack + n <= haystack_end && page_safe(haystack)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); @@ -330,7 +330,7 @@ private: UInt8 l{}; UInt8 u{}; -#if defined(__x86_64__) +#if __SSE4_1__ /// vectors filled with `l` and `u`, for determining leftmost position of the first symbol __m128i patl, patu; /// lower and uppercase vectors of first 16 characters of `needle` @@ -348,7 +348,7 @@ public: l = static_cast(std::tolower(*needle)); u = static_cast(std::toupper(*needle)); -#if defined(__x86_64__) +#if __SSE4_1__ patl = _mm_set1_epi8(l); patu = _mm_set1_epi8(u); @@ -372,7 +372,7 @@ public: bool compare(const UInt8 * pos) const { -#if defined(__x86_64__) +#if __SSE4_1__ if (page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -424,7 +424,7 @@ public: while (haystack < haystack_end) { -#if defined(__x86_64__) +#if __SSE4_1__ if (haystack + n <= haystack_end && page_safe(haystack)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(haystack)); @@ -516,7 +516,7 @@ private: /// first character in `needle` UInt8 first{}; -#if defined(__x86_64__) +#if __SSE4_1__ /// vector filled `first` for determining leftmost position of the first symbol __m128i pattern; /// vector of first 16 characters of `needle` @@ -533,7 +533,7 @@ public: first = *needle; -#if defined(__x86_64__) +#if __SSE4_1__ pattern = _mm_set1_epi8(first); auto needle_pos = needle; @@ -554,7 +554,7 @@ public: bool compare(const UInt8 * pos) const { -#if defined(__x86_64__) +#if __SSE4_1__ if (page_safe(pos)) { const auto v_haystack = _mm_loadu_si128(reinterpret_cast(pos)); @@ -604,7 +604,7 @@ public: while (haystack < haystack_end) { -#if defined(__x86_64__) +#if __SSE4_1__ if (haystack + n <= haystack_end && page_safe(haystack)) { /// find first character diff --git a/dbms/include/DB/Common/memcpySmall.h b/dbms/include/DB/Common/memcpySmall.h index 5828c046a40..b35a6343294 100644 --- a/dbms/include/DB/Common/memcpySmall.h +++ b/dbms/include/DB/Common/memcpySmall.h @@ -3,9 +3,7 @@ #include #include - -#if defined(__x86_64__) - +#if __SSE2__ #include diff --git a/dbms/include/DB/Core/StringRef.h b/dbms/include/DB/Core/StringRef.h index 6b8e34b2a29..93f2184f384 100644 --- a/dbms/include/DB/Core/StringRef.h +++ b/dbms/include/DB/Core/StringRef.h @@ -5,7 +5,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE2__ #include #endif @@ -35,7 +35,7 @@ using StringRefs = std::vector; using UInt64 = DB::UInt64; -#if defined(__x86_64__) +#if __SSE2__ /** Сравнение строк на равенство. * Подход является спорным и выигрывает не во всех случаях. @@ -128,7 +128,7 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if defined(__x86_64__) +#if __SSE2__ return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); @@ -169,7 +169,7 @@ struct StringRefHash64 } }; -#if defined(__x86_64__) +#if __SSE4_2__ #ifdef __SSE4_1__ #include diff --git a/dbms/include/DB/Functions/FunctionsArithmetic.h b/dbms/include/DB/Functions/FunctionsArithmetic.h index e613001b7a1..d4eca307799 100644 --- a/dbms/include/DB/Functions/FunctionsArithmetic.h +++ b/dbms/include/DB/Functions/FunctionsArithmetic.h @@ -943,7 +943,7 @@ template <> struct FunctionUnaryArithmeticMonotonicity /// Оптимизации для целочисленного деления на константу. -#if defined(__x86_64__) +#if __SSE2__ #define LIBDIVIDE_USE_SSE2 1 #endif @@ -981,7 +981,7 @@ struct DivideIntegralByConstantImpl const A * a_end = a_pos + size; ResultType * c_pos = &c[0]; -#if defined(__x86_64__) +#if __SSE2__ static constexpr size_t values_per_sse_register = 16 / sizeof(A); const A * a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register; diff --git a/dbms/include/DB/Functions/FunctionsRound.h b/dbms/include/DB/Functions/FunctionsRound.h index ed808f47155..1b62ea5a6d6 100644 --- a/dbms/include/DB/Functions/FunctionsRound.h +++ b/dbms/include/DB/Functions/FunctionsRound.h @@ -5,7 +5,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE4_1__ #include #endif @@ -268,7 +268,7 @@ namespace DB } }; -#if defined(__x86_64__) +#if __SSE4_1__ template class BaseFloatRoundingComputation; diff --git a/dbms/include/DB/Functions/FunctionsString.h b/dbms/include/DB/Functions/FunctionsString.h index 3429bf09cf6..a2d7741a896 100644 --- a/dbms/include/DB/Functions/FunctionsString.h +++ b/dbms/include/DB/Functions/FunctionsString.h @@ -16,9 +16,8 @@ #include #include -#if defined(__x86_64__) +#if __SSE2__ #include - #include #endif @@ -232,7 +231,7 @@ private: { const auto flip_case_mask = 'A' ^ 'a'; -#if defined(__x86_64__) +#if __SSE2__ const auto bytes_sse = sizeof(__m128i); const auto src_end_sse = src_end - (src_end - src) % bytes_sse; @@ -393,7 +392,7 @@ private: static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) { -#if defined(__x86_64__) +#if __SSE2__ const auto bytes_sse = sizeof(__m128i); auto src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse; diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index 2fcd7ddeb17..1acf6c5c8e8 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -1,4 +1,4 @@ -#if defined(__x86_64__) +#if __SSE2__ #include #endif @@ -20,7 +20,7 @@ size_t countBytesInFilter(const IColumn::Filter & filt) const Int8 * pos = reinterpret_cast(&filt[0]); const Int8 * end = pos + filt.size(); -#if defined(__x86_64__) +#if __SSE2__ && __POPCNT__ const __m128i zero16 = _mm_setzero_si128(); const Int8 * end64 = pos + filt.size() / 64 * 64; @@ -95,7 +95,7 @@ void filterArraysImpl( memcpy(&res_elems[elems_size_old], &src_elems[offset], size * sizeof(T)); }; -#if defined(__x86_64__) +#if __SSE2__ const __m128i zero_vec = _mm_setzero_si128(); static constexpr size_t SIMD_BYTES = 16; const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 9b6f166425a..80395786552 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -10,7 +10,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE2__ #include #endif @@ -125,7 +125,7 @@ static NO_INLINE void deserializeBinarySSE2(ColumnString::Chars_t & data, Column if (size) { -#if defined(__x86_64__) +#if __SSE2__ /// Оптимистичная ветка, в которой возможно более эффективное копирование. if (offset + 16 * UNROLL_TIMES <= data.allocated_size() && istr.position() + size + 16 * UNROLL_TIMES <= istr.buffer().end()) { diff --git a/dbms/src/IO/ReadHelpers.cpp b/dbms/src/IO/ReadHelpers.cpp index dbb107ce09f..0d9c609957a 100644 --- a/dbms/src/IO/ReadHelpers.cpp +++ b/dbms/src/IO/ReadHelpers.cpp @@ -1,7 +1,3 @@ -#if defined(__x86_64__) - #include -#endif - #include #include diff --git a/dbms/src/IO/WriteBufferValidUTF8.cpp b/dbms/src/IO/WriteBufferValidUTF8.cpp index ed2fdebb500..e2b7c4b1bb4 100644 --- a/dbms/src/IO/WriteBufferValidUTF8.cpp +++ b/dbms/src/IO/WriteBufferValidUTF8.cpp @@ -2,8 +2,8 @@ #include #include -#ifdef __x86_64__ -#include +#if __SSE2__ + #include #endif @@ -69,7 +69,7 @@ void WriteBufferValidUTF8::nextImpl() while (p < pos) { -#ifdef __x86_64__ +#if __SSE2__ /// Fast skip of ASCII static constexpr size_t SIMD_BYTES = 16; const char * simd_end = p + (pos - p) / SIMD_BYTES * SIMD_BYTES; diff --git a/dbms/src/IO/tests/mempbrk.cpp b/dbms/src/IO/tests/mempbrk.cpp index 537f01858d7..41a6d99e83e 100644 --- a/dbms/src/IO/tests/mempbrk.cpp +++ b/dbms/src/IO/tests/mempbrk.cpp @@ -1,7 +1,3 @@ -#if defined(__x86_64__) - #include -#endif - #include #include #include diff --git a/dbms/src/IO/tests/parse_int_perf2.cpp b/dbms/src/IO/tests/parse_int_perf2.cpp index 427b261f03c..691e73eda05 100644 --- a/dbms/src/IO/tests/parse_int_perf2.cpp +++ b/dbms/src/IO/tests/parse_int_perf2.cpp @@ -1,7 +1,3 @@ -#if defined(__x86_64__) - #include -#endif - #include #include @@ -11,22 +7,6 @@ #include -#if defined(__x86_64__) -std::ostream & operator<< (std::ostream & ostr, const __m128i vec) -{ - char digits[16]; - _mm_store_si128(reinterpret_cast<__m128i *>(digits), vec); - - ostr << "{"; - for (size_t i = 0; i < 16; ++i) - ostr << (i ? ", " : "") << static_cast(digits[i]); - ostr << "}"; - - return ostr; -} -#endif - - namespace test { template diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp index a121d80cc01..f060867343d 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp @@ -15,7 +15,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE4_1__ #include #endif @@ -77,7 +77,7 @@ DefineStringRef(StringRef_Compare16_1_byMemcmp) DefineStringRef(StringRef_Compare16_1_byUInt64_logicAnd) DefineStringRef(StringRef_Compare16_1_byUInt64_bitAnd) -#if defined(__x86_64__) +#if __SSE4_1__ DefineStringRef(StringRef_Compare16_1_byIntSSE) DefineStringRef(StringRef_Compare16_1_byFloatSSE) DefineStringRef(StringRef_Compare16_1_bySSE4) @@ -196,7 +196,7 @@ inline bool compare_byUInt64_bitAnd(const char * p1, const char * p2) & (reinterpret_cast(p1)[1] == reinterpret_cast(p2)[1]); } -#if defined(__x86_64__) +#if __SSE4_1__ inline bool compare_byIntSSE(const char * p1, const char * p2) { @@ -263,7 +263,7 @@ inline bool memequal(const char * p1, const char * p2, size_t size) } -#if defined(__x86_64__) +#if __SSE4_1__ inline bool memequal_sse41(const char * p1, const char * p2, size_t size) { @@ -514,7 +514,7 @@ Op(byMemcmp) Op(byUInt64_logicAnd) Op(byUInt64_bitAnd) -#if defined(__x86_64__) +#if __SSE4_1__ Op(byIntSSE) Op(byFloatSSE) @@ -642,7 +642,7 @@ int main(int argc, char ** argv) if (!m || m == 5) bench (data, "StringRef_Compare16_1_byMemcmp"); if (!m || m == 6) bench(data, "StringRef_Compare16_1_byUInt64_logicAnd"); if (!m || m == 7) bench (data, "StringRef_Compare16_1_byUInt64_bitAnd"); -#if defined(__x86_64__) +#if __SSE4_1__ if (!m || m == 8) bench (data, "StringRef_Compare16_1_byIntSSE"); if (!m || m == 9) bench (data, "StringRef_Compare16_1_byFloatSSE"); if (!m || m == 10) bench (data, "StringRef_Compare16_1_bySSE4"); diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index f05a427642c..8712b8817c2 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -18,7 +18,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE4_1__ #include #endif @@ -139,7 +139,7 @@ struct FastHash64 }; -#if defined(__x86_64__) +#if __SSE4_1__ struct CrapWow { @@ -229,7 +229,7 @@ struct SimpleHash if (size < 8) { -#if defined(__x86_64__) +#if __SSE4_1__ return hashLessThan8(x.data, x.size); #endif } @@ -266,7 +266,7 @@ struct VerySimpleHash if (size < 8) { -#if defined(__x86_64__) +#if __SSE4_1__ return hashLessThan8(x.data, x.size); #endif } @@ -316,7 +316,7 @@ struct MetroHash64 }; -#if defined(__x86_64__) +#if __SSE4_1__ /*struct CRC32Hash { @@ -466,7 +466,7 @@ int main(int argc, char ** argv) if (!m || m == 2) bench (data, "StringRef_FastHash64"); if (!m || m == 3) bench (data, "StringRef_SimpleHash"); -#if defined(__x86_64__) +#if __SSE4_1__ if (!m || m == 4) bench (data, "StringRef_CrapWow"); if (!m || m == 5) bench (data, "StringRef_CRC32Hash"); if (!m || m == 6) bench (data, "StringRef_CRC32ILPHash"); diff --git a/dbms/src/Interpreters/tests/hash_map_string_small.cpp b/dbms/src/Interpreters/tests/hash_map_string_small.cpp index 534dd25102b..e4fd8661085 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_small.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_small.cpp @@ -64,7 +64,7 @@ inline bool operator==(SmallStringRef lhs, SmallStringRef rhs) if (lhs.size == 0) return true; -#if __x86_64__ +#if __SSE2__ return memequalSSE2Wide(lhs.data(), rhs.data(), lhs.size); #else return false; diff --git a/libs/libcommon/include/common/find_first_symbols.h b/libs/libcommon/include/common/find_first_symbols.h index 52ae6951ea6..33c9a96cf77 100644 --- a/libs/libcommon/include/common/find_first_symbols.h +++ b/libs/libcommon/include/common/find_first_symbols.h @@ -1,6 +1,9 @@ #pragma once -#if defined(__x86_64__) +#if __SSE2__ + #include +#endif +#if __SSE4_2__ #include #endif @@ -35,7 +38,7 @@ inline bool is_in(char x) return x == s0 || is_in(x); } -#if defined(__x86_64__) +#if __SSE2__ template inline __m128i mm_is_in(__m128i bytes) { @@ -56,7 +59,7 @@ inline __m128i mm_is_in(__m128i bytes) template inline const char * find_first_symbols_sse2(const char * begin, const char * end) { -#if defined(__x86_64__) +#if __SSE2__ for (; begin + 15 < end; begin += 16) { __m128i bytes = _mm_loadu_si128(reinterpret_cast(begin)); @@ -83,7 +86,7 @@ template inline const char * find_first_symbols_sse42_impl(const char * begin, const char * end) { -#if defined(__x86_64__) +#if __SSE4_2__ #define MODE (_SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT) __m128i set = _mm_setr_epi8(c01, c02, c03, c04, c05, c06, c07, c08, c09, c10, c11, c12, c13, c14, c15, c16); @@ -131,8 +134,10 @@ inline const char * find_first_symbols_sse42(const char * begin, const char * en template inline const char * find_first_symbols(const char * begin, const char * end) { +#if __SSE4_2__ if (sizeof...(symbols) >= 5) return detail::find_first_symbols_sse42(begin, end); else +#endif return detail::find_first_symbols_sse2(begin, end); } diff --git a/utils/fill-factor/main.cpp b/utils/fill-factor/main.cpp index f5da10bbb96..a924848a035 100644 --- a/utils/fill-factor/main.cpp +++ b/utils/fill-factor/main.cpp @@ -1,7 +1,7 @@ #include #include -#if defined(__x86_64__) +#if __SSE2__ #include #endif @@ -15,7 +15,7 @@ int main(int argc, char ** argv) { -#if defined(__x86_64__) +#if __SSE2__ try { DB::ReadBufferFromFileDescriptor in(STDIN_FILENO);