From dff3c0c8670239ca395067c39adfef4c5c797b04 Mon Sep 17 00:00:00 2001 From: M1eyu2018 <857037797@qq.com> Date: Mon, 17 Apr 2023 12:33:39 +0800 Subject: [PATCH] Update HDFS: Support for erasure codes Signed-off-by: M1eyu2018 <857037797@qq.com> --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 + contrib/isa-l | 1 + contrib/isa-l-cmake/CMakeLists.txt | 186 ++++++++++++++++++++++++++ contrib/libhdfs3 | 2 +- contrib/libhdfs3-cmake/CMakeLists.txt | 27 ++++ 6 files changed, 220 insertions(+), 1 deletion(-) create mode 160000 contrib/isa-l create mode 100644 contrib/isa-l-cmake/CMakeLists.txt diff --git a/.gitmodules b/.gitmodules index ca55281e643..e4d63a34118 100644 --- a/.gitmodules +++ b/.gitmodules @@ -335,3 +335,6 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/isa-l"] + path = contrib/isa-l + url = https://github.com/ClickHouse/isa-l.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index be3563d2c61..0ff8b550a98 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -191,6 +191,8 @@ add_contrib (google-benchmark-cmake google-benchmark) add_contrib (ulid-c-cmake ulid-c) +add_contrib (isa-l-cmake isa-l) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/isa-l b/contrib/isa-l new file mode 160000 index 00000000000..9f2b68f0575 --- /dev/null +++ b/contrib/isa-l @@ -0,0 +1 @@ +Subproject commit 9f2b68f05752097f0f16632fc4a9a86950831efd diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt new file mode 100644 index 00000000000..fd0218a7b80 --- /dev/null +++ b/contrib/isa-l-cmake/CMakeLists.txt @@ -0,0 +1,186 @@ +set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l") + +# check nasm compiler +include(CheckLanguage) +check_language(ASM_NASM) +if(NOT CMAKE_ASM_NASM_COMPILER) + message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!") +endif() + +enable_language(ASM_NASM) + +set(ISAL_C_SRC + ${ISAL_SOURCE_DIR}/crc/crc_base_aliases.c + ${ISAL_SOURCE_DIR}/crc/crc_base.c + ${ISAL_SOURCE_DIR}/crc/crc64_base.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_base.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_base_aliases.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_highlevel_func.c + ${ISAL_SOURCE_DIR}/erasure_code/gen_rs_matrix_limits.c + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_1tbl.c + ${ISAL_SOURCE_DIR}/igzip/adler32_base.c + ${ISAL_SOURCE_DIR}/igzip/encode_df.c + ${ISAL_SOURCE_DIR}/igzip/flatten_ll.c + ${ISAL_SOURCE_DIR}/igzip/generate_custom_hufftables.c + ${ISAL_SOURCE_DIR}/igzip/generate_static_inflate.c + ${ISAL_SOURCE_DIR}/igzip/huff_codes.c + ${ISAL_SOURCE_DIR}/igzip/hufftables_c.c + ${ISAL_SOURCE_DIR}/igzip/igzip_base_aliases.c + ${ISAL_SOURCE_DIR}/igzip/igzip_base.c + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_base.c + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body.c + ${ISAL_SOURCE_DIR}/igzip/igzip_inflate.c + ${ISAL_SOURCE_DIR}/igzip/igzip.c + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base_aliases.c + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base.c + ${ISAL_SOURCE_DIR}/programs/igzip_cli.c + ${ISAL_SOURCE_DIR}/raid/raid_base_aliases.c + ${ISAL_SOURCE_DIR}/raid/raid_base.c +) + +set(ISAL_ASM_SRC + ${ISAL_SOURCE_DIR}/crc/crc_multibinary.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_01.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_02.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4_02.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8_02.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_01.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_02.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_00.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_01.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_multibinary.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/ec_multibinary.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_sse.asm + ${ISAL_SOURCE_DIR}/igzip/adler32_avx2_4.asm + ${ISAL_SOURCE_DIR}/igzip/adler32_sse.asm + ${ISAL_SOURCE_DIR}/igzip/bitbuf2.asm + ${ISAL_SOURCE_DIR}/igzip/encode_df_04.asm + ${ISAL_SOURCE_DIR}/igzip/encode_df_06.asm + ${ISAL_SOURCE_DIR}/igzip/heap_macros.asm + ${ISAL_SOURCE_DIR}/igzip/huffman.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_body.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_compare_types.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_01.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_deflate_hash.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_finish.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_06.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body_h1_gr_bt.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_finish.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_inflate_multibinary.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_multibinary.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_06.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_01.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_04.asm + ${ISAL_SOURCE_DIR}/igzip/lz0a_const.asm + ${ISAL_SOURCE_DIR}/igzip/options.asm + ${ISAL_SOURCE_DIR}/igzip/proc_heap.asm + ${ISAL_SOURCE_DIR}/igzip/rfc1951_lookup.asm + ${ISAL_SOURCE_DIR}/igzip/stdmac.asm + ${ISAL_SOURCE_DIR}/mem/mem_multibinary.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx2.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx512.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_sse.asm + ${ISAL_SOURCE_DIR}/raid/pq_check_sse.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx2.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx512.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_sse.asm + ${ISAL_SOURCE_DIR}/raid/raid_multibinary.asm + ${ISAL_SOURCE_DIR}/raid/xor_check_sse.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_avx.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_avx512.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_sse.asm +) + +# Adding ISA-L library target +add_library(_isal ${ISAL_C_SRC} ${ISAL_ASM_SRC}) + +# Setting external and internal interfaces for ISA-L library +target_include_directories(_isal + PUBLIC ${ISAL_SOURCE_DIR}/include + PUBLIC ${ISAL_SOURCE_DIR}/igzip + PUBLIC ${ISAL_SOURCE_DIR}/crc + PUBLIC ${ISAL_SOURCE_DIR}/erasure_code) + +# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. +# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" +if (SANITIZE STREQUAL "undefined") + get_target_property(target_options _isal COMPILE_OPTIONS) + list(REMOVE_ITEM target_options "-fno-sanitize=undefined") + set_property(TARGET _isal PROPERTY COMPILE_OPTIONS ${target_options}) +endif() + +add_library(ch_contrib::isal ALIAS _isal) diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index 3c91d96ff29..164b89253fa 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit 3c91d96ff29fe5928f055519c6d979c4b104db9e +Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index c22cac731fe..d9f7009c1bd 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -70,6 +70,30 @@ set(SRCS "${HDFS3_SOURCE_DIR}/client/Token.cpp" "${HDFS3_SOURCE_DIR}/client/PacketPool.cpp" "${HDFS3_SOURCE_DIR}/client/OutputStream.cpp" + "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/ByteBufferDecodingState.cpp" + "${HDFS3_SOURCE_DIR}/client/ByteBufferEncodingState.cpp" + "${HDFS3_SOURCE_DIR}/client/CoderUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/ECChunk.cpp" + "${HDFS3_SOURCE_DIR}/client/ErasureCoderOptions.cpp" + "${HDFS3_SOURCE_DIR}/client/GF256.cpp" + "${HDFS3_SOURCE_DIR}/client/GaloisField.cpp" + "${HDFS3_SOURCE_DIR}/client/NativeRSRawDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/NativeRSRawEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/Preconditions.cpp" + "${HDFS3_SOURCE_DIR}/client/RSUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureCoderFactory.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/StatefulStripeReader.cpp" + "${HDFS3_SOURCE_DIR}/client/StripeReader.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedBlockUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedInputStreamImpl.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedOutputStreamImpl.cpp" + "${HDFS3_SOURCE_DIR}/client/SystemECPolicies.cpp" + "${HDFS3_SOURCE_DIR}/client/dump.cpp" + "${HDFS3_SOURCE_DIR}/client/erasure_coder.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp" @@ -148,6 +172,9 @@ if (TARGET OpenSSL::SSL) target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() +target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal) +add_definitions(-DHADOOP_ISAL_LIBRARY) + add_library(ch_contrib::hdfs ALIAS _hdfs3) if (ENABLE_CLICKHOUSE_BENCHMARK)