Update HDFS: Support for erasure codes

Signed-off-by: M1eyu2018 <857037797@qq.com>
This commit is contained in:
M1eyu2018 2023-04-17 12:33:39 +08:00
parent 0dab82c420
commit dff3c0c867
6 changed files with 220 additions and 1 deletions

3
.gitmodules vendored
View File

@ -335,3 +335,6 @@
[submodule "contrib/liburing"]
path = contrib/liburing
url = https://github.com/axboe/liburing
[submodule "contrib/isa-l"]
path = contrib/isa-l
url = https://github.com/ClickHouse/isa-l.git

View File

@ -191,6 +191,8 @@ add_contrib (google-benchmark-cmake google-benchmark)
add_contrib (ulid-c-cmake ulid-c)
add_contrib (isa-l-cmake isa-l)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,

1
contrib/isa-l vendored Submodule

@ -0,0 +1 @@
Subproject commit 9f2b68f05752097f0f16632fc4a9a86950831efd

View File

@ -0,0 +1,186 @@
set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l")
# check nasm compiler
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
endif()
enable_language(ASM_NASM)
set(ISAL_C_SRC
${ISAL_SOURCE_DIR}/crc/crc_base_aliases.c
${ISAL_SOURCE_DIR}/crc/crc_base.c
${ISAL_SOURCE_DIR}/crc/crc64_base.c
${ISAL_SOURCE_DIR}/erasure_code/ec_base.c
${ISAL_SOURCE_DIR}/erasure_code/ec_base_aliases.c
${ISAL_SOURCE_DIR}/erasure_code/ec_highlevel_func.c
${ISAL_SOURCE_DIR}/erasure_code/gen_rs_matrix_limits.c
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_1tbl.c
${ISAL_SOURCE_DIR}/igzip/adler32_base.c
${ISAL_SOURCE_DIR}/igzip/encode_df.c
${ISAL_SOURCE_DIR}/igzip/flatten_ll.c
${ISAL_SOURCE_DIR}/igzip/generate_custom_hufftables.c
${ISAL_SOURCE_DIR}/igzip/generate_static_inflate.c
${ISAL_SOURCE_DIR}/igzip/huff_codes.c
${ISAL_SOURCE_DIR}/igzip/hufftables_c.c
${ISAL_SOURCE_DIR}/igzip/igzip_base_aliases.c
${ISAL_SOURCE_DIR}/igzip/igzip_base.c
${ISAL_SOURCE_DIR}/igzip/igzip_icf_base.c
${ISAL_SOURCE_DIR}/igzip/igzip_icf_body.c
${ISAL_SOURCE_DIR}/igzip/igzip_inflate.c
${ISAL_SOURCE_DIR}/igzip/igzip.c
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base_aliases.c
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base.c
${ISAL_SOURCE_DIR}/programs/igzip_cli.c
${ISAL_SOURCE_DIR}/raid/raid_base_aliases.c
${ISAL_SOURCE_DIR}/raid/raid_base.c
)
set(ISAL_ASM_SRC
${ISAL_SOURCE_DIR}/crc/crc_multibinary.asm
${ISAL_SOURCE_DIR}/crc/crc16_t10dif_01.asm
${ISAL_SOURCE_DIR}/crc/crc16_t10dif_02.asm
${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by4.asm
${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4_02.asm
${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4.asm
${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8_02.asm
${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8.asm
${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc32_ieee_01.asm
${ISAL_SOURCE_DIR}/crc/crc32_ieee_02.asm
${ISAL_SOURCE_DIR}/crc/crc32_ieee_by4.asm
${ISAL_SOURCE_DIR}/crc/crc32_ieee_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc32_iscsi_00.asm
${ISAL_SOURCE_DIR}/crc/crc32_iscsi_01.asm
${ISAL_SOURCE_DIR}/crc/crc32_iscsi_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by8.asm
${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by8.asm
${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by8.asm
${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by8.asm
${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by8.asm
${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by8.asm
${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by16_10.asm
${ISAL_SOURCE_DIR}/crc/crc64_multibinary.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/ec_multibinary.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx2.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx512.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_sse.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_avx.asm
${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_sse.asm
${ISAL_SOURCE_DIR}/igzip/adler32_avx2_4.asm
${ISAL_SOURCE_DIR}/igzip/adler32_sse.asm
${ISAL_SOURCE_DIR}/igzip/bitbuf2.asm
${ISAL_SOURCE_DIR}/igzip/encode_df_04.asm
${ISAL_SOURCE_DIR}/igzip/encode_df_06.asm
${ISAL_SOURCE_DIR}/igzip/heap_macros.asm
${ISAL_SOURCE_DIR}/igzip/huffman.asm
${ISAL_SOURCE_DIR}/igzip/igzip_body.asm
${ISAL_SOURCE_DIR}/igzip/igzip_compare_types.asm
${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_01.asm
${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_04.asm
${ISAL_SOURCE_DIR}/igzip/igzip_deflate_hash.asm
${ISAL_SOURCE_DIR}/igzip/igzip_finish.asm
${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_04.asm
${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_06.asm
${ISAL_SOURCE_DIR}/igzip/igzip_icf_body_h1_gr_bt.asm
${ISAL_SOURCE_DIR}/igzip/igzip_icf_finish.asm
${ISAL_SOURCE_DIR}/igzip/igzip_inflate_multibinary.asm
${ISAL_SOURCE_DIR}/igzip/igzip_multibinary.asm
${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_04.asm
${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_06.asm
${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_01.asm
${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_04.asm
${ISAL_SOURCE_DIR}/igzip/lz0a_const.asm
${ISAL_SOURCE_DIR}/igzip/options.asm
${ISAL_SOURCE_DIR}/igzip/proc_heap.asm
${ISAL_SOURCE_DIR}/igzip/rfc1951_lookup.asm
${ISAL_SOURCE_DIR}/igzip/stdmac.asm
${ISAL_SOURCE_DIR}/mem/mem_multibinary.asm
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx.asm
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx2.asm
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx512.asm
${ISAL_SOURCE_DIR}/mem/mem_zero_detect_sse.asm
${ISAL_SOURCE_DIR}/raid/pq_check_sse.asm
${ISAL_SOURCE_DIR}/raid/pq_gen_avx.asm
${ISAL_SOURCE_DIR}/raid/pq_gen_avx2.asm
${ISAL_SOURCE_DIR}/raid/pq_gen_avx512.asm
${ISAL_SOURCE_DIR}/raid/pq_gen_sse.asm
${ISAL_SOURCE_DIR}/raid/raid_multibinary.asm
${ISAL_SOURCE_DIR}/raid/xor_check_sse.asm
${ISAL_SOURCE_DIR}/raid/xor_gen_avx.asm
${ISAL_SOURCE_DIR}/raid/xor_gen_avx512.asm
${ISAL_SOURCE_DIR}/raid/xor_gen_sse.asm
)
# Adding ISA-L library target
add_library(_isal ${ISAL_C_SRC} ${ISAL_ASM_SRC})
# Setting external and internal interfaces for ISA-L library
target_include_directories(_isal
PUBLIC ${ISAL_SOURCE_DIR}/include
PUBLIC ${ISAL_SOURCE_DIR}/igzip
PUBLIC ${ISAL_SOURCE_DIR}/crc
PUBLIC ${ISAL_SOURCE_DIR}/erasure_code)
# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined"
if (SANITIZE STREQUAL "undefined")
get_target_property(target_options _isal COMPILE_OPTIONS)
list(REMOVE_ITEM target_options "-fno-sanitize=undefined")
set_property(TARGET _isal PROPERTY COMPILE_OPTIONS ${target_options})
endif()
add_library(ch_contrib::isal ALIAS _isal)

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 3c91d96ff29fe5928f055519c6d979c4b104db9e
Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d

View File

@ -70,6 +70,30 @@ set(SRCS
"${HDFS3_SOURCE_DIR}/client/Token.cpp"
"${HDFS3_SOURCE_DIR}/client/PacketPool.cpp"
"${HDFS3_SOURCE_DIR}/client/OutputStream.cpp"
"${HDFS3_SOURCE_DIR}/client/AbstractNativeRawDecoder.cpp"
"${HDFS3_SOURCE_DIR}/client/AbstractNativeRawEncoder.cpp"
"${HDFS3_SOURCE_DIR}/client/ByteBufferDecodingState.cpp"
"${HDFS3_SOURCE_DIR}/client/ByteBufferEncodingState.cpp"
"${HDFS3_SOURCE_DIR}/client/CoderUtil.cpp"
"${HDFS3_SOURCE_DIR}/client/ECChunk.cpp"
"${HDFS3_SOURCE_DIR}/client/ErasureCoderOptions.cpp"
"${HDFS3_SOURCE_DIR}/client/GF256.cpp"
"${HDFS3_SOURCE_DIR}/client/GaloisField.cpp"
"${HDFS3_SOURCE_DIR}/client/NativeRSRawDecoder.cpp"
"${HDFS3_SOURCE_DIR}/client/NativeRSRawEncoder.cpp"
"${HDFS3_SOURCE_DIR}/client/Preconditions.cpp"
"${HDFS3_SOURCE_DIR}/client/RSUtil.cpp"
"${HDFS3_SOURCE_DIR}/client/RawErasureCoderFactory.cpp"
"${HDFS3_SOURCE_DIR}/client/RawErasureDecoder.cpp"
"${HDFS3_SOURCE_DIR}/client/RawErasureEncoder.cpp"
"${HDFS3_SOURCE_DIR}/client/StatefulStripeReader.cpp"
"${HDFS3_SOURCE_DIR}/client/StripeReader.cpp"
"${HDFS3_SOURCE_DIR}/client/StripedBlockUtil.cpp"
"${HDFS3_SOURCE_DIR}/client/StripedInputStreamImpl.cpp"
"${HDFS3_SOURCE_DIR}/client/StripedOutputStreamImpl.cpp"
"${HDFS3_SOURCE_DIR}/client/SystemECPolicies.cpp"
"${HDFS3_SOURCE_DIR}/client/dump.cpp"
"${HDFS3_SOURCE_DIR}/client/erasure_coder.cpp"
"${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp"
"${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp"
"${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp"
@ -148,6 +172,9 @@ if (TARGET OpenSSL::SSL)
target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
endif()
target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal)
add_definitions(-DHADOOP_ISAL_LIBRARY)
add_library(ch_contrib::hdfs ALIAS _hdfs3)
if (ENABLE_CLICKHOUSE_BENCHMARK)