From e1def723f8dea97b5fe71c09fcf15131e539d48c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 5 Sep 2022 12:04:10 +0000 Subject: [PATCH 1/8] Add special x86-SSE2-only build --- .github/workflows/master.yml | 48 ++++++++++++++++++++++++++++++ .github/workflows/pull_request.yml | 46 ++++++++++++++++++++++++++++ cmake/cpu_features.cmake | 17 +++++++++++ docker/packager/packager | 4 +++ tests/ci/ci_config.py | 11 +++++++ 5 files changed, 126 insertions(+) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index e8e3deceef5..d3a303eb7ab 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -923,6 +923,53 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinAmd64SSE2: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_amd64sse2 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1011,6 +1058,7 @@ jobs: - BuilderBinFreeBSD # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinAmd64SSE2 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 7901008a7db..c100b079ed5 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -935,6 +935,51 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinAmd64SSE2: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_amd64sse2 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1023,6 +1068,7 @@ jobs: - BuilderBinFreeBSD # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinAmd64SSE2 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 1fc3c2db804..218b4deedce 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -24,6 +24,23 @@ option (ENABLE_BMI "Use BMI instructions on x86_64" 0) option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) +# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware. +option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0) +if (NO_SSE3_OR_HIGHER) + SET(ENABLE_SSSE3 0) + SET(ENABLE_SSE41 0) + SET(ENABLE_SSE42 0) + SET(ENABLE_PCLMULQDQ 0) + SET(ENABLE_POPCNT 0) + SET(ENABLE_AVX 0) + SET(ENABLE_AVX2 0) + SET(ENABLE_AVX512 0) + SET(ENABLE_AVX512_VBMI 0) + SET(ENABLE_BMI 0) + SET(ENABLE_AVX2_FOR_SPEC_OP 0) + SET(ENABLE_AVX512_FOR_SPEC_OP 0) +endif() + option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) if (ARCH_NATIVE) diff --git a/docker/packager/packager b/docker/packager/packager index 66eb568d460..f878444d4bc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -190,6 +190,9 @@ def parse_env_variables( cc = compiler result.append("DEB_ARCH=amd64") + if compiler.endswith("-amd64sse2"): + cmake_flags.append('-DNO_SSE3_OR_HIGHER=1') + cxx = cc.replace("gcc", "g++").replace("clang", "clang++") if package_type == "deb": @@ -339,6 +342,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", + "clang-14-amd64sse2", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 3d0513bca47..b49e91a9e79 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -161,6 +161,16 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, + "binary_amd64sse2": { + "compiler": "clang-14-amd64sse2", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "static_binary_name": "amd64sse2", + "libraries": "static", + "tidy": "disable", + "with_coverage": False, + }, }, "builds_report_config": { "ClickHouse build check": [ @@ -182,6 +192,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", + "binary_amd64sse2", ], }, "tests_config": { From 438ed368a1e56af77367b42f411b099992c5ba38 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 5 Sep 2022 17:49:00 +0000 Subject: [PATCH 2/8] fix: correct compiler parsing --- docker/packager/packager | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index f878444d4bc..363be9ab2dd 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -130,6 +130,7 @@ def parse_env_variables( ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" + AMD64_SSE2_SUFFIX = "-amd64sse2" result = [] result.append("OUTPUT_DIR=/output") @@ -141,6 +142,7 @@ def parse_env_variables( is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) + is_amd64_sse2 = compiler.endswidth(AMD64_SSE2_SUFFIX) if is_cross_darwin: cc = compiler[: -len(DARWIN_SUFFIX)] @@ -186,13 +188,13 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) + elif is_amd64_sse2: + cc = compiler[: -len(AMD64_SSE2_SUFFIX)] + result.append("DEB_ARCH=amd64") else: cc = compiler result.append("DEB_ARCH=amd64") - if compiler.endswith("-amd64sse2"): - cmake_flags.append('-DNO_SSE3_OR_HIGHER=1') - cxx = cc.replace("gcc", "g++").replace("clang", "clang++") if package_type == "deb": From 1ebcc3a14ee7bd60ea5819648fb5c44360e1e07c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 6 Sep 2022 07:41:37 +0000 Subject: [PATCH 3/8] fix: endswidth --> endswith --- docker/packager/packager | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/packager b/docker/packager/packager index 363be9ab2dd..de41b23acb0 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -142,7 +142,7 @@ def parse_env_variables( is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) - is_amd64_sse2 = compiler.endswidth(AMD64_SSE2_SUFFIX) + is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX) if is_cross_darwin: cc = compiler[: -len(DARWIN_SUFFIX)] From 652f1bfd19a086349723b39a13054a1b9da586d5 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 6 Sep 2022 12:18:11 +0000 Subject: [PATCH 4/8] fix: pass -DNO_SSE3_OR_HIGHER=1 from packager --- docker/packager/packager | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/packager/packager b/docker/packager/packager index de41b23acb0..591262959b4 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -191,6 +191,7 @@ def parse_env_variables( elif is_amd64_sse2: cc = compiler[: -len(AMD64_SSE2_SUFFIX)] result.append("DEB_ARCH=amd64") + cmake_flags.append("-DNO_SSE3_OR_HIGHER=1") else: cc = compiler result.append("DEB_ARCH=amd64") From cc1bd3ac362151a2e8f57025440d12b29efa5c23 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 6 Sep 2022 16:15:50 +0000 Subject: [PATCH 5/8] fix: disable vectorscan when building w/o SSE >=3 --- contrib/vectorscan-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/vectorscan-cmake/CMakeLists.txt b/contrib/vectorscan-cmake/CMakeLists.txt index bc17105be99..f9f46d9a8cf 100644 --- a/contrib/vectorscan-cmake/CMakeLists.txt +++ b/contrib/vectorscan-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ # We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan. -if (ARCH_AMD64) +if (ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER) option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES}) endif() From d054ffd1109ab93ec366a64558c7c983cd23a11e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 6 Sep 2022 18:23:19 +0000 Subject: [PATCH 6/8] fix: don't force-inline SSE3 code into generic code Force-inlining code compiled for SSE3 into "generic" (non-platform-specific) code works for standard x86 builds where everything is compiled with SSE 4.2 (and smaller). It no longer works if we compile everything only with SSE 2. --- src/Functions/GatherUtils/sliceHasImplAnyAll.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 21c80b742fd..3ca8c6be9a7 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -419,7 +419,7 @@ DECLARE_SSE42_SPECIFIC_CODE ( // SSE4.2 Int64, UInt64 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64( +inline bool sliceHasImplAnyAllImplInt64( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -495,7 +495,7 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64( // SSE4.2 Int32, UInt32 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32( +inline bool sliceHasImplAnyAllImplInt32( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -580,7 +580,7 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32( // SSE4.2 Int16, UInt16 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16( +inline bool sliceHasImplAnyAllImplInt16( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -682,7 +682,7 @@ inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16( // SSE2 Int8, UInt8 specialization template requires (std::is_same_v || std::is_same_v) -inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt8( +inline bool sliceHasImplAnyAllImplInt8( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, From bf8fed8be8cac07ef0cc0b56e05e3d6631976c27 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 7 Sep 2022 09:45:07 +0000 Subject: [PATCH 7/8] Revert "fix: don't force-inline SSE3 code into generic code" This reverts commit d054ffd1109ab93ec366a64558c7c983cd23a11e. --- src/Functions/GatherUtils/sliceHasImplAnyAll.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 3ca8c6be9a7..21c80b742fd 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -419,7 +419,7 @@ DECLARE_SSE42_SPECIFIC_CODE ( // SSE4.2 Int64, UInt64 specialization template requires (std::is_same_v || std::is_same_v) -inline bool sliceHasImplAnyAllImplInt64( +inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt64( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -495,7 +495,7 @@ inline bool sliceHasImplAnyAllImplInt64( // SSE4.2 Int32, UInt32 specialization template requires (std::is_same_v || std::is_same_v) -inline bool sliceHasImplAnyAllImplInt32( +inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt32( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -580,7 +580,7 @@ inline bool sliceHasImplAnyAllImplInt32( // SSE4.2 Int16, UInt16 specialization template requires (std::is_same_v || std::is_same_v) -inline bool sliceHasImplAnyAllImplInt16( +inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt16( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, @@ -682,7 +682,7 @@ inline bool sliceHasImplAnyAllImplInt16( // SSE2 Int8, UInt8 specialization template requires (std::is_same_v || std::is_same_v) -inline bool sliceHasImplAnyAllImplInt8( +inline ALWAYS_INLINE bool sliceHasImplAnyAllImplInt8( const NumericArraySlice & first, const NumericArraySlice & second, const UInt8 * first_null_map, From c07f234f095f575f2fb6902b007c5b736061ec48 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 7 Sep 2022 10:01:51 +0000 Subject: [PATCH 8/8] fix: disable ENABLE_MULTITARGET_CODE for SSE2 builds --- src/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fd8771c1529..3dc42746d67 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -42,6 +42,14 @@ endif () # See `src/Common/TargetSpecific.h` option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON) +if (NO_SSE3_OR_HIGHER) + # Optimized x86 code in DECLARE_*_SPECIFIC_CODE blocks (see `src/Common/TargetSpecific.h`) is sometimes marked FORCE_INLINE. As a + # result, its instruction set requirements (e.g. SSE4.2) leak into generic code. This is normally not a problem for standard x86 builds + # because generic code is compiled with SSE 4.2 anyways. But it breaks SSE2-only builds. Therefore disabling the multitarget code + # machinery and always use generic code. (The cleaner alternative is removing FORCE_INLINE but that impacts performance too much.) + set(ENABLE_MULTITARGET_CODE OFF) +endif() + if (ENABLE_MULTITARGET_CODE) add_definitions(-DENABLE_MULTITARGET_CODE=1) else()