Improve performance of BLAKE3 by 11% by enabling LTO for Rust

LTO in Rust produces multiple definition of `rust_eh_personality' (and
few others), and to overcome this --allow-multiple-definition has been
added.

Query for benchmark:

    SELECT ignore(BLAKE3(materialize('Lorem ipsum dolor sit amet, consectetur adipiscing elit'))) FROM numbers(1000000000) FORMAT `Null`

  upstream           : Elapsed: 2.494 sec. Processed 31.13 million rows, 249.08 MB (12.48 million rows/s., 99.86 MB/s.)
  upstream + rust lto: Elapsed: 13.56 sec. Processed 191.9 million rows, 1.5400 GB (14.15 million rows/s., 113.22 MB/s.)
  llvm BLAKE3        : Elapsed: 3.053 sec. Processed 43.24 million rows, 345.88 MB (14.16 million rows/s., 113.28 MB/s.)

Note, I thought about simply replacing it with BLAKE3 from LLVM, but:
- this will not solve LTO issues for Rust (and in future more libraries
  could be added)
- it makes integrating_rust_libraries.md useless (and there is even blog
  post)

So instead I've decided to add this quirk (--allow-multiple-definition)
to fix builds.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-05-06 18:54:56 +02:00
parent 2104baabce
commit 1107988a82
4 changed files with 43 additions and 2 deletions

View File

@ -522,6 +522,26 @@ include (cmake/print_flags.cmake)
if (ENABLE_RUST)
add_subdirectory (rust)
# With LTO Rust adds few symbols with global visiblity, the most common is
# rust_eh_personality. And this leads to linking errors because multiple
# Rust libraries contains the same symbol.
#
# If it was shared library, that we could use version script for linker to
# hide this symbols, but libraries are static.
#
# we could in theory compile everything to one library but this will be a
# mess
#
# But this should be OK since CI has lots of other builds that are done
# without LTO and it will find multiple definitions if there will be any.
#
# More information about this behaviour in Rust can be found here
# - https://github.com/rust-lang/rust/issues/44322
# - https://alanwu.space/post/symbol-hygiene/
if (ENABLE_THINLTO)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-multiple-definition")
endif()
endif()
add_subdirectory (base)

View File

@ -11,3 +11,10 @@ libc = "0.2.132"
[lib]
crate-type = ["staticlib"]
[profile.release]
debug = true
[profile.release-thinlto]
inherits = "release"
# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit
lto = true

View File

@ -34,9 +34,18 @@ function(clickhouse_import_crate)
else()
set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};debug")
endif()
# NOTE: we may use LTO for rust too
corrosion_import_crate(NO_STD ${ARGN})
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set(profile "")
else()
if (ENABLE_THINLTO)
set(profile "release-thinlto")
else()
set(profile "release")
endif()
endif()
corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile})
endfunction()
# Add crate from the build directory.

View File

@ -18,3 +18,8 @@ crate-type = ["staticlib"]
[profile.release]
debug = true
[profile.release-thinlto]
inherits = "release"
# We use LTO here as well to slightly decrease binary size
lto = true