From b00c66cb36ea611e750197f669c0cc702ba2d615 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 13 Apr 2021 21:53:55 +0300 Subject: [PATCH] More safe CPU dispatching --- src/Functions/CMakeLists.txt | 4 +- src/Functions/intDiv.cpp | 115 +---------------------------------- 2 files changed, 5 insertions(+), 114 deletions(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 1c3beb2e47d..7cbca175c0d 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -1,5 +1,7 @@ configure_file(config_functions.h.in ${ConfigIncludePath}/config_functions.h) +add_subdirectory(divide) + include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) add_headers_and_sources(clickhouse_functions .) @@ -25,7 +27,7 @@ target_link_libraries(clickhouse_functions PRIVATE ${ZLIB_LIBRARIES} boost::filesystem - libdivide + divide_impl ) if (OPENSSL_CRYPTO_LIBRARY) diff --git a/src/Functions/intDiv.cpp b/src/Functions/intDiv.cpp index 42b0299ce01..98ce4fe30de 100644 --- a/src/Functions/intDiv.cpp +++ b/src/Functions/intDiv.cpp @@ -1,28 +1,7 @@ #include #include -#include -#if defined(__x86_64__) - #define LIBDIVIDE_SSE2 1 - #define LIBDIVIDE_AVX2 1 - - #if defined(__clang__) - #pragma clang attribute push(__attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2"))), apply_to=function) - #else - #pragma GCC push_options - #pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native") - #endif -#endif - -#include - -#if defined(__x86_64__) - #if defined(__clang__) - #pragma clang attribute pop - #else - #pragma GCC pop_options - #endif -#endif +#include "divide/divide.h" namespace DB @@ -37,83 +16,6 @@ namespace /// Optimizations for integer division by a constant. -#if defined(__x86_64__) - -DECLARE_DEFAULT_CODE ( - template - void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) - { - libdivide::divider divider(b); - const A * a_end = a_pos + size; - - static constexpr size_t values_per_simd_register = 16 / sizeof(A); - const A * a_end_simd = a_pos + size / values_per_simd_register * values_per_simd_register; - - while (a_pos < a_end_simd) - { - _mm_storeu_si128(reinterpret_cast<__m128i *>(c_pos), - _mm_loadu_si128(reinterpret_cast(a_pos)) / divider); - - a_pos += values_per_simd_register; - c_pos += values_per_simd_register; - } - - while (a_pos < a_end) - { - *c_pos = *a_pos / divider; - ++a_pos; - ++c_pos; - } - } -) - -DECLARE_AVX2_SPECIFIC_CODE ( - template - void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) - { - libdivide::divider divider(b); - const A * a_end = a_pos + size; - - static constexpr size_t values_per_simd_register = 32 / sizeof(A); - const A * a_end_simd = a_pos + size / values_per_simd_register * values_per_simd_register; - - while (a_pos < a_end_simd) - { - _mm256_storeu_si256(reinterpret_cast<__m256i *>(c_pos), - _mm256_loadu_si256(reinterpret_cast(a_pos)) / divider); - - a_pos += values_per_simd_register; - c_pos += values_per_simd_register; - } - - while (a_pos < a_end) - { - *c_pos = *a_pos / divider; - ++a_pos; - ++c_pos; - } - } -) - -#else - -template -void divideImpl(const A * __restrict a_pos, B b, ResultType * __restrict c_pos, size_t size) -{ - libdivide::divider divider(b); - const A * a_end = a_pos + size; - - while (a_pos < a_end) - { - *c_pos = *a_pos / divider; - ++a_pos; - ++c_pos; - } -} - -#endif - - template struct DivideIntegralByConstantImpl : BinaryOperation> @@ -164,20 +66,7 @@ struct DivideIntegralByConstantImpl if (unlikely(static_cast(b) == 0)) throw Exception("Division by zero", ErrorCodes::ILLEGAL_DIVISION); -#if USE_MULTITARGET_CODE - if (isArchSupported(TargetArch::AVX2)) - { - TargetSpecific::AVX2::divideImpl(a_pos, b, c_pos, size); - } - else -#endif - { -#if __x86_64__ - TargetSpecific::Default::divideImpl(a_pos, b, c_pos, size); -#else - divideImpl(a_pos, b, c_pos, size); -#endif - } + divideImpl(a_pos, b, c_pos, size); } };