From 1107988a82a84f3f68d8db835ac5168d1b36e8b4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 6 May 2023 18:54:56 +0200 Subject: [PATCH] Improve performance of BLAKE3 by 11% by enabling LTO for Rust LTO in Rust produces multiple definition of `rust_eh_personality' (and few others), and to overcome this --allow-multiple-definition has been added. Query for benchmark: SELECT ignore(BLAKE3(materialize('Lorem ipsum dolor sit amet, consectetur adipiscing elit'))) FROM numbers(1000000000) FORMAT `Null` upstream : Elapsed: 2.494 sec. Processed 31.13 million rows, 249.08 MB (12.48 million rows/s., 99.86 MB/s.) upstream + rust lto: Elapsed: 13.56 sec. Processed 191.9 million rows, 1.5400 GB (14.15 million rows/s., 113.22 MB/s.) llvm BLAKE3 : Elapsed: 3.053 sec. Processed 43.24 million rows, 345.88 MB (14.16 million rows/s., 113.28 MB/s.) Note, I thought about simply replacing it with BLAKE3 from LLVM, but: - this will not solve LTO issues for Rust (and in future more libraries could be added) - it makes integrating_rust_libraries.md useless (and there is even blog post) So instead I've decided to add this quirk (--allow-multiple-definition) to fix builds. Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 20 ++++++++++++++++++++ rust/BLAKE3/Cargo.toml | 7 +++++++ rust/CMakeLists.txt | 13 +++++++++++-- rust/skim/Cargo.toml | 5 +++++ 4 files changed, 43 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 263b202049b..31f58b399db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -522,6 +522,26 @@ include (cmake/print_flags.cmake) if (ENABLE_RUST) add_subdirectory (rust) + + # With LTO Rust adds few symbols with global visiblity, the most common is + # rust_eh_personality. And this leads to linking errors because multiple + # Rust libraries contains the same symbol. + # + # If it was shared library, that we could use version script for linker to + # hide this symbols, but libraries are static. + # + # we could in theory compile everything to one library but this will be a + # mess + # + # But this should be OK since CI has lots of other builds that are done + # without LTO and it will find multiple definitions if there will be any. + # + # More information about this behaviour in Rust can be found here + # - https://github.com/rust-lang/rust/issues/44322 + # - https://alanwu.space/post/symbol-hygiene/ + if (ENABLE_THINLTO) + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--allow-multiple-definition") + endif() endif() add_subdirectory (base) diff --git a/rust/BLAKE3/Cargo.toml b/rust/BLAKE3/Cargo.toml index eb8f3467424..ed414fa54c1 100644 --- a/rust/BLAKE3/Cargo.toml +++ b/rust/BLAKE3/Cargo.toml @@ -11,3 +11,10 @@ libc = "0.2.132" [lib] crate-type = ["staticlib"] +[profile.release] +debug = true + +[profile.release-thinlto] +inherits = "release" +# BLAKE3 module requires "full" LTO (not "thin") to get additional 10% performance benefit +lto = true diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index ec2377fce71..d229894791a 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -34,9 +34,18 @@ function(clickhouse_import_crate) else() set(CMAKE_CONFIGURATION_TYPES "${CMAKE_BUILD_TYPE};debug") endif() - # NOTE: we may use LTO for rust too - corrosion_import_crate(NO_STD ${ARGN}) + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set(profile "") + else() + if (ENABLE_THINLTO) + set(profile "release-thinlto") + else() + set(profile "release") + endif() + endif() + + corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile}) endfunction() # Add crate from the build directory. diff --git a/rust/skim/Cargo.toml b/rust/skim/Cargo.toml index ce137221065..e5801a26f77 100644 --- a/rust/skim/Cargo.toml +++ b/rust/skim/Cargo.toml @@ -18,3 +18,8 @@ crate-type = ["staticlib"] [profile.release] debug = true + +[profile.release-thinlto] +inherits = "release" +# We use LTO here as well to slightly decrease binary size +lto = true