Merge pull request #25146 from oxidecomputer/master

Adds a better way to include binary resources
This commit is contained in:
alexey-milovidov 2021-06-13 16:13:34 +03:00 committed by GitHub
commit a2c6b98b94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 173 additions and 129 deletions

View File

@ -183,24 +183,20 @@ endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
if (OS_LINUX)
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
if (ARCH_AMD64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
elseif (ARCH_AARCH64)
set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
endif ()
else ()
message(FATAL_ERROR "Cannot find objcopy.")
endif ()
find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
if (OBJCOPY_PATH)
message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
else ()
message(FATAL_ERROR "Cannot find objcopy.")
endif ()
if (OS_DARWIN)
set(WHOLE_ARCHIVE -all_load)
set(NO_WHOLE_ARCHIVE -noall_load)
# The `-all_load` flag forces loading of all symbols from all libraries,
# and leads to multiply-defined symbols. This flag allows force loading
# from a _specific_ library, which is what we need.
set(WHOLE_ARCHIVE -force_load)
# The `-noall_load` flag is the default and now obsolete.
set(NO_WHOLE_ARCHIVE "")
else ()
set(WHOLE_ARCHIVE --whole-archive)
set(NO_WHOLE_ARCHIVE --no-whole-archive)

View File

@ -4,23 +4,42 @@
#include <string>
#include <boost/algorithm/string/replace.hpp>
std::string_view getResource(std::string_view name)
{
// Convert the resource file name into the form generated by `ld -r -b binary`.
std::string name_replaced(name);
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
boost::replace_all(name_replaced, "+", "_PLUS_");
/// These are the names that are generated by "ld -r -b binary"
std::string symbol_name_data = "_binary_" + name_replaced + "_start";
std::string symbol_name_size = "_binary_" + name_replaced + "_size";
// In most `dlsym(3)` APIs, one passes the symbol name as it appears via
// something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
// looked up with the string `"_foo"`.
//
// Apple's linker is confusingly different. The NOTES on the man page for
// `dlsym(3)` claim that one looks up the symbol with "the name used in C
// source code". In this example, that would mean using the string `"foo"`.
// This apparently applies even in the case where the symbol did not originate
// from C source, such as the embedded binary resource files used here. So
// the symbol name must not have a leading `_` on Apple platforms. It's not
// clear how this applies to other symbols, such as those which _have_ a leading
// underscore in them by design, many leading underscores, etc.
#if defined OS_DARWIN
std::string prefix = "binary_";
#else
std::string prefix = "_binary_";
#endif
std::string symbol_name_start = prefix + name_replaced + "_start";
std::string symbol_name_end = prefix + name_replaced + "_end";
const void * sym_data = dlsym(RTLD_DEFAULT, symbol_name_data.c_str());
const void * sym_size = dlsym(RTLD_DEFAULT, symbol_name_size.c_str());
const char* sym_start = reinterpret_cast<const char*>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
const char* sym_end = reinterpret_cast<const char*>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));
if (sym_data && sym_size)
return { static_cast<const char *>(sym_data), unalignedLoad<size_t>(&sym_size) };
if (sym_start && sym_end)
{
auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
return { sym_start, resource_size };
}
return {};
}

76
cmake/embed_binary.cmake Normal file
View File

@ -0,0 +1,76 @@
# Embed a set of resource files into a resulting object file.
#
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
#
# This will generate a static library target named `<target>`, which contains the contents of
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
#
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
# These are:
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
#
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
macro(clickhouse_embed_binaries)
set(one_value_args TARGET RESOURCE_DIR)
set(resources RESOURCES)
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
if (NOT DEFINED EMBED_TARGET)
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
endif()
if (NOT DEFINED EMBED_RESOURCE_DIR)
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
list(LENGTH EMBED_RESOURCES N_RESOURCES)
if (N_RESOURCES LESS 1)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
# If cross-compiling, ensure we use the toolchain file and target the
# actual target architecture
if (CMAKE_CROSSCOMPILING)
set(CROSS_COMPILE_FLAGS "--target=${CMAKE_C_COMPILER_TARGET} --gcc-toolchain=${TOOLCHAIN_FILE}")
else()
set(CROSS_COMPILE_FLAGS "")
endif()
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
set(RESOURCE_OBJS)
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(RESOURCE_OBJ "${RESOURCE_FILE}.o")
list(APPEND RESOURCE_OBJS "${RESOURCE_OBJ}")
# Normalize the name of the resource
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
# Put the configured assembly file in the output directory.
# This is so we can clean it up as usual, and we CD to the
# source directory before compiling, so that the assembly
# `.incbin` directive can find the file.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Generate the output object file by compiling the assembly, in the directory of
# the sources so that the resource file may also be found
add_custom_command(
OUTPUT ${RESOURCE_OBJ}
COMMAND cd "${EMBED_RESOURCE_DIR}" &&
${CMAKE_C_COMPILER} "${CROSS_COMPILE_FLAGS}" -c -o
"${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
"${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}"
)
set_source_files_properties("${RESOURCE_OBJ}" PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach()
add_library("${EMBED_TARGET}" STATIC ${RESOURCE_OBJS})
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
endmacro()

View File

@ -4,6 +4,7 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
get_filename_component (TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" REALPATH)
# We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)

View File

@ -39,6 +39,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
endif()
if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(USE_INTERNAL_CCTZ_LIBRARY 1)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
@ -70,63 +71,36 @@ if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
# Build a libray with embedded tzdata
if (OS_LINUX)
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
set(TZ_OBJS)
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | sort | paste -sd ';'"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
set(TIMEZONE_RESOURCE_FILES)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
string(REPLACE "/" "_" TIMEZONE_ID ${TIMEZONE})
string(REPLACE "+" "_PLUS_" TIMEZONE_ID ${TIMEZONE_ID})
set(TZ_OBJ ${TIMEZONE_ID}.o)
set(TZ_OBJS ${TZ_OBJS} ${TZ_OBJ})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
# https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
# PPC64LE fails to do this with objcopy, use ld or lld instead
if (ARCH_PPC64LE)
add_custom_command(OUTPUT ${TZ_OBJ}
COMMAND cp "${TZDIR}/${TIMEZONE}" "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}"
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o ${TZ_OBJ} ${TIMEZONE_ID}
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}")
else()
add_custom_command(OUTPUT ${TZ_OBJ}
COMMAND cp "${TZDIR}/${TIMEZONE}" "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}"
COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS}
--rename-section .data=.rodata,alloc,load,readonly,data,contents ${TIMEZONE_ID} ${TZ_OBJ}
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}")
endif()
set_source_files_properties(${TZ_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
add_library(tzdata STATIC ${TZ_OBJS})
set_target_properties(tzdata PROPERTIES LINKER_LANGUAGE C)
# whole-archive prevents symbols from being discarded for unknown reason
# CMake can shuffle each of target_link_libraries arguments with other
# libraries in linker command. To avoid this we hardcode whole-archive
# library into single string.
add_dependencies(cctz tzdata)
target_link_libraries(cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
else ()
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {nullptr};\n" )
endif ()
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
clickhouse_embed_binaries(
TARGET tzdata
RESOURCE_DIR "${TZDIR}"
RESOURCES ${TIMEZONE_RESOURCE_FILES}
)
add_dependencies(cctz tzdata)
target_link_libraries(cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
message (STATUS "Using cctz")

View File

@ -204,55 +204,6 @@ macro(clickhouse_program_add name)
clickhouse_program_add_executable(${name})
endmacro()
# Embed default config files as a resource into the binary.
# This is needed for two purposes:
# 1. Allow to run the binary without download of any other files.
# 2. Allow to implement "sudo clickhouse install" tool.
#
# Arguments: target (server, client, keeper, etc.) and list of files
#
# Also dependency on TARGET_FILE is required, look at examples in programs/server and programs/keeper
macro(clickhouse_embed_binaries)
# TODO We actually need this on Mac, FreeBSD.
if (OS_LINUX)
set(arguments_list "${ARGN}")
list(GET arguments_list 0 target)
# for some reason cmake iterates loop including <stop>
math(EXPR arguments_count "${ARGC}-1")
foreach(RESOURCE_POS RANGE 1 "${arguments_count}")
list(GET arguments_list "${RESOURCE_POS}" RESOURCE_FILE)
set(RESOURCE_OBJ ${RESOURCE_FILE}.o)
set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ})
# https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
# PPC64LE fails to do this with objcopy, use ld or lld instead
if (ARCH_PPC64LE)
add_custom_command(OUTPUT ${RESOURCE_OBJ}
COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" ${RESOURCE_FILE})
else()
add_custom_command(OUTPUT ${RESOURCE_OBJ}
COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents
"${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}")
endif()
set_source_files_properties(${RESOURCE_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
endforeach()
add_library(clickhouse_${target}_configs STATIC ${RESOURCE_OBJS})
set_target_properties(clickhouse_${target}_configs PROPERTIES LINKER_LANGUAGE C)
# whole-archive prevents symbols from being discarded for unknown reason
# CMake can shuffle each of target_link_libraries arguments with other
# libraries in linker command. To avoid this we hardcode whole-archive
# library into single string.
add_dependencies(clickhouse-${target}-lib clickhouse_${target}_configs)
endif ()
endmacro()
add_subdirectory (server)
add_subdirectory (client)
add_subdirectory (local)

View File

@ -0,0 +1,17 @@
// Embed a binary file into an executable.
// The variable BINARY_FILE_NAME is the actual name of the file to include
// The variable SYMBOL_NAME is the "normalized" name of the symbol, with
// symbols like `-`, `.`, and `/` replaced with `_`. This is to match how
// objcopy rewrites symbol names, and matches the expectation in
// `base/common/getResource.cpp`
.data
.global _binary_@SYMBOL_NAME@_start
_binary_@SYMBOL_NAME@_start:
.incbin "@BINARY_FILE_NAME@"
.global _binary_@SYMBOL_NAME@_end
_binary_@SYMBOL_NAME@_end:
.global _binary_@SYMBOL_NAME@_size
_binary_@SYMBOL_NAME@_size:
.quad _binary_@SYMBOL_NAME@_end - _binary_@SYMBOL_NAME@_start

View File

@ -1,3 +1,5 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(CLICKHOUSE_KEEPER_SOURCES
Keeper.cpp
)
@ -21,4 +23,8 @@ clickhouse_program_add(keeper)
install (FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
clickhouse_embed_binaries(keeper keeper_config.xml keeper_embedded.xml)
clickhouse_embed_binaries(
TARGET clickhouse_keeper_configs
RESOURCES keeper_config.xml keeper_embedded.xml
)
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)

View File

@ -1,11 +1,11 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(CLICKHOUSE_SERVER_SOURCES
MetricsTransmitter.cpp
Server.cpp
)
if (OS_LINUX)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
set (CLICKHOUSE_SERVER_LINK
PRIVATE
@ -31,4 +31,8 @@ clickhouse_program_add(server)
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
clickhouse_embed_binaries(server config.xml users.xml embedded.xml play.html)
clickhouse_embed_binaries(
TARGET clickhouse_server_configs
RESOURCES config.xml users.xml embedded.xml play.html
)
add_dependencies(clickhouse-server-lib clickhouse_server_configs)