Merge branch 'incbin' into no-export-dynamic

This commit is contained in:
Alexey Milovidov 2023-07-23 21:34:12 +02:00
commit 63659c55a1
37 changed files with 352 additions and 465 deletions

3
.gitmodules vendored
View File

@ -340,3 +340,6 @@
[submodule "contrib/c-ares"] [submodule "contrib/c-ares"]
path = contrib/c-ares path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git url = https://github.com/c-ares/c-ares.git
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git

View File

@ -1,58 +0,0 @@
# Embed a set of resource files into a resulting object file.
#
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
#
# This will generate a static library target named `<target>`, which contains the contents of
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
#
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
# These are:
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
#
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
macro(clickhouse_embed_binaries)
set(one_value_args TARGET RESOURCE_DIR)
set(resources RESOURCES)
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
if (NOT DEFINED EMBED_TARGET)
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
endif()
if (NOT DEFINED EMBED_RESOURCE_DIR)
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
list(LENGTH EMBED_RESOURCES N_RESOURCES)
if (N_RESOURCES LESS 1)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
endforeach()
endmacro()

View File

@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft) add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float) add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP) if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c) add_contrib (lemmagen-c-cmake lemmagen-c)
add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2) add_contrib (cld2-cmake cld2)
endif() endif()

View File

@ -1,4 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
set (SRCS set (SRCS
@ -23,12 +22,10 @@ if (OS_FREEBSD)
endif () endif ()
# Related to time_zones table: # Related to time_zones table:
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build # TimeZones.generated.cpp is autogenerated each time during a build
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
# as the library that's built using embedded tzdata is also specific to OS_LINUX
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build. # remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) file(REMOVE ${TIMEZONES_FILE})
# get the list of timezones from tzdata shipped with cctz # get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
@ -36,28 +33,45 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
set(TIMEZONE_RESOURCE_FILES)
# each file in that dir (except of tab and localtime) store the info about timezone # each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES) OUTPUT_VARIABLE TIMEZONES)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
foreach(TIMEZONE ${TIMEZONES}) foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE) endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
clickhouse_embed_binaries( file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
TARGET tzdata
RESOURCE_DIR "${TZDIR}" file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
RESOURCES ${TIMEZONE_RESOURCE_FILES} file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
)
add_dependencies(_cctz tzdata) set (COUNTER 1)
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}") foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${TIMEZONES_FILE} " return {};\n")
file(APPEND ${TIMEZONES_FILE} "}\n")
add_library (tzdata ${TIMEZONES_FILE})
target_link_libraries(tzdata ch_contrib::incbin)
target_include_directories(tzdata PRIVATE ${TZDIR})
target_link_libraries(_cctz tzdata)
add_library(ch_contrib::cctz ALIAS _cctz) add_library(ch_contrib::cctz ALIAS _cctz)

1
contrib/incbin vendored Submodule

@ -0,0 +1 @@
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf

View File

@ -0,0 +1,4 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
add_library(_incbin INTERFACE)
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
add_library(ch_contrib::incbin ALIAS _incbin)

View File

@ -1,15 +0,0 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
add_library (_nlp_data INTERFACE)
clickhouse_embed_binaries(
TARGET nlp_dictionaries
RESOURCE_DIR "${LIBRARY_DIR}"
RESOURCES charset.zst tonality_ru.zst programming.zst
)
add_dependencies(_nlp_data nlp_dictionaries)
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
add_library(ch_contrib::nlp_data ALIAS _nlp_data)

View File

@ -147,6 +147,7 @@ function clone_submodules
contrib/simdjson contrib/simdjson
contrib/liburing contrib/liburing
contrib/libfiu contrib/libfiu
contrib/incbin
) )
git submodule sync git submodule sync

View File

@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK
) )
clickhouse_program_add_library(install) clickhouse_program_add_library(install)
# For incbin
target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server")

View File

@ -20,10 +20,7 @@
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
#include <Common/OpenSSLHelpers.h> #include <Common/OpenSSLHelpers.h>
#include <base/hex.h>
#include <Common/getResource.h>
#include <base/sleep.h> #include <base/sleep.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h> #include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
@ -35,6 +32,12 @@
#include <Poco/Util/XMLConfiguration.h> #include <Poco/Util/XMLConfiguration.h>
#include <incbin.h>
/// Embedded configuration files used inside the install program
INCBIN(resource_config_xml, "config.xml");
INCBIN(resource_users_xml, "users.xml");
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
* It also allows to avoid dependency on systemd, upstart, SysV init. * It also allows to avoid dependency on systemd, upstart, SysV init.
@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(main_config_file)) if (!fs::exists(main_config_file))
{ {
std::string_view main_config_content = getResource("config.xml"); std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
if (main_config_content.empty()) if (main_config_content.empty())
{ {
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(users_config_file)) if (!fs::exists(users_config_file))
{ {
std::string_view users_config_content = getResource("users.xml"); std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
if (users_config_content.empty()) if (users_config_content.empty())
{ {
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());

View File

@ -1,16 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
if (OS_LINUX)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
# for some reason INTERFACE linkage doesn't work for standalone binary
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
clickhouse_embed_binaries(
TARGET clickhouse_keeper_configs
RESOURCES keeper_config.xml keeper_embedded.xml
)
set(CLICKHOUSE_KEEPER_SOURCES set(CLICKHOUSE_KEEPER_SOURCES
Keeper.cpp Keeper.cpp
) )
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_program_add(keeper) clickhouse_program_add(keeper)
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER) if (BUILD_STANDALONE_KEEPER)
# Straight list of all required sources # Straight list of all required sources
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
) )
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (SPLIT_DEBUG_SYMBOLS) if (SPLIT_DEBUG_SYMBOLS)

View File

@ -457,8 +457,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path}; std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path); if (!cert_path.empty())
if (!key_path.empty()) extra_paths.emplace_back(key_path); extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
/// ConfigReloader have to strict parameters which are redundant in our case /// ConfigReloader have to strict parameters which are redundant in our case
auto main_config_reloader = std::make_unique<ConfigReloader>( auto main_config_reloader = std::make_unique<ConfigReloader>(

View File

@ -1,12 +1,8 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(CLICKHOUSE_SERVER_SOURCES set(CLICKHOUSE_SERVER_SOURCES
MetricsTransmitter.cpp MetricsTransmitter.cpp
Server.cpp Server.cpp
) )
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
set (CLICKHOUSE_SERVER_LINK set (CLICKHOUSE_SERVER_LINK
PRIVATE PRIVATE
clickhouse_aggregate_functions clickhouse_aggregate_functions
@ -33,10 +29,4 @@ endif()
clickhouse_program_add(server) clickhouse_program_add(server)
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
clickhouse_embed_binaries(
TARGET clickhouse_server_configs
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
)
add_dependencies(clickhouse-server-lib clickhouse_server_configs)

View File

@ -128,6 +128,10 @@
# include <azure/storage/common/internal/xml_wrapper.hpp> # include <azure/storage/common/internal/xml_wrapper.hpp>
#endif #endif
#include <incbin.h>
/// A minimal file used when the server is run without installation
INCBIN(resource_embedded_xml, "embedded.xml");
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric Revision; extern const Metric Revision;
@ -393,6 +397,7 @@ int Server::run()
void Server::initialize(Poco::Util::Application & self) void Server::initialize(Poco::Util::Application & self)
{ {
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
BaseDaemon::initialize(self); BaseDaemon::initialize(self);
logger().information("starting up"); logger().information("starting up");
@ -1105,8 +1110,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path}; std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path); if (!cert_path.empty())
if (!key_path.empty()) extra_paths.emplace_back(key_path); extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
auto main_config_reloader = std::make_unique<ConfigReloader>( auto main_config_reloader = std::make_unique<ConfigReloader>(
config_path, config_path,

View File

View File

@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif() endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
add_subdirectory(Access/Common) add_subdirectory(Access/Common)
add_subdirectory(Common/ZooKeeper) add_subdirectory(Common/ZooKeeper)
@ -296,7 +296,7 @@ macro (dbms_target_include_directories)
endforeach () endforeach ()
endmacro () endmacro ()
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server")
target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
if (TARGET ch_contrib::llvm) if (TARGET ch_contrib::llvm)
@ -561,7 +561,7 @@ if (ENABLE_NLP)
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data) target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data)
endif() endif()
if (TARGET ch_contrib::ulid) if (TARGET ch_contrib::ulid)

View File

@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
endif() endif()
if (ENABLE_MYSQL) if (ENABLE_MYSQL)
add_subdirectory (mysqlxx) add_subdirectory(mysqlxx)
endif () endif ()

View File

@ -19,7 +19,6 @@
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/getResource.h>
#include <Common/XMLUtils.h> #include <Common/XMLUtils.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <base/errnoToString.h> #include <base/errnoToString.h>
@ -83,6 +82,13 @@ ConfigProcessor::~ConfigProcessor()
Poco::Logger::destroy("ConfigProcessor"); Poco::Logger::destroy("ConfigProcessor");
} }
static std::unordered_map<std::string, std::string_view> embedded_configs;
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
{
embedded_configs[name] = content;
}
/// Vector containing the name of the element and a sorted list of attribute names and values /// Vector containing the name of the element and a sorted list of attribute names and values
/// (except "remove" and "replace" attributes). /// (except "remove" and "replace" attributes).
@ -281,15 +287,15 @@ void ConfigProcessor::doIncludesRecursive(
{ {
std::string value = node->nodeValue(); std::string value = node->nodeValue();
bool replace_occured = false; bool replace_occurred = false;
size_t pos; size_t pos;
while ((pos = value.find(substitution.first)) != std::string::npos) while ((pos = value.find(substitution.first)) != std::string::npos)
{ {
value.replace(pos, substitution.first.length(), substitution.second); value.replace(pos, substitution.first.length(), substitution.second);
replace_occured = true; replace_occurred = true;
} }
if (replace_occured) if (replace_occurred)
node->setNodeValue(value); node->setNodeValue(value);
} }
} }
@ -528,26 +534,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
} }
else else
{ {
/// These embedded files added during build with some cmake magic. /// When we can use a config embedded in the binary.
/// Look at the end of programs/server/CMakeLists.txt. if (auto it = embedded_configs.find(path); it != embedded_configs.end())
std::string embedded_name;
if (path == "config.xml")
embedded_name = "embedded.xml";
if (path == "keeper_config.xml")
embedded_name = "keeper_embedded.xml";
/// When we can use config embedded in binary.
if (!embedded_name.empty())
{ {
auto resource = getResource(embedded_name);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path); LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
config = dom_parser.parseMemory(resource.data(), resource.size()); config = dom_parser.parseMemory(it->second.data(), it->second.size());
} }
else else
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path); throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
} }
std::vector<std::string> contributing_files; std::vector<std::string> contributing_files;

View File

@ -65,6 +65,9 @@ public:
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr, zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
const zkutil::EventPtr & zk_changed_event = nullptr); const zkutil::EventPtr & zk_changed_event = nullptr);
/// These configurations will be used if there is no configuration file.
static void registerEmbeddedConfig(std::string name, std::string_view content);
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration. /// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
/// The resulting XML document is saved into a file with the name /// The resulting XML document is saved into a file with the name

View File

@ -3,7 +3,6 @@
#include <cctz/civil_time.h> #include <cctz/civil_time.h>
#include <cctz/time_zone.h> #include <cctz/time_zone.h>
#include <cctz/zone_info_source.h> #include <cctz/zone_info_source.h>
#include <Common/getResource.h>
#include <Poco/Exception.h> #include <Poco/Exception.h>
#include <algorithm> #include <algorithm>
@ -11,6 +10,11 @@
#include <chrono> #include <chrono>
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <iostream>
/// Embedded timezones.
std::string_view getTimeZone(const char * name);
namespace namespace
@ -249,9 +253,10 @@ namespace cctz_extension
const std::string & name, const std::string & name,
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback) const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
{ {
std::string_view resource = getResource(name); std::string_view tz_file = getTimeZone(name.data());
if (!resource.empty())
return std::make_unique<Source>(resource.data(), resource.size()); if (!tz_file.empty())
return std::make_unique<Source>(tz_file.data(), tz_file.size());
return fallback(name); return fallback(name);
} }

View File

@ -0,0 +1,185 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <incbin.h>
/// Embedded SQL definitions
INCBIN(resource_charset_zst, "charset.zst");
INCBIN(resource_tonality_ru_zst, "tonality_ru.zst");
INCBIN(resource_programming_zst, "programming.zst");
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
FrequencyHolder & FrequencyHolder::getInstance()
{
static FrequencyHolder instance;
return instance;
}
FrequencyHolder::FrequencyHolder()
{
loadEmotionalDict();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void FrequencyHolder::loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void FrequencyHolder::loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void FrequencyHolder::loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
}
#endif

View File

@ -1,5 +1,9 @@
#pragma once #pragma once
#include "config.h"
#if USE_NLP
#include <base/StringRef.h> #include <base/StringRef.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
@ -7,7 +11,6 @@
#include <unordered_map> #include <unordered_map>
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/getResource.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
@ -20,11 +23,6 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
/// FrequencyHolder class is responsible for storing and loading dictionaries /// FrequencyHolder class is responsible for storing and loading dictionaries
/// needed for text classification functions: /// needed for text classification functions:
/// ///
@ -56,11 +54,7 @@ public:
using EncodingMap = HashMap<UInt16, Float64>; using EncodingMap = HashMap<UInt16, Float64>;
using EncodingContainer = std::vector<Encoding>; using EncodingContainer = std::vector<Encoding>;
static FrequencyHolder & getInstance() static FrequencyHolder & getInstance();
{
static FrequencyHolder instance;
return instance;
}
const Map & getEmotionalDict() const const Map & getEmotionalDict() const
{ {
@ -78,161 +72,11 @@ public:
} }
private: private:
FrequencyHolder();
FrequencyHolder() void loadEncodingsFrequency();
{ void loadEmotionalDict();
loadEmotionalDict(); void loadProgrammingFrequency();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
auto resource = getResource("charset.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
auto resource = getResource("tonality_ru.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
auto resource = getResource("programming.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
Arena string_pool; Arena string_pool;
@ -241,3 +85,5 @@ private:
EncodingContainer encodings_freq; EncodingContainer encodings_freq;
}; };
} }
#endif

View File

@ -88,50 +88,13 @@ namespace
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
{
const char * char_address = static_cast<const char *>(address);
if (name.starts_with("_binary_") || name.starts_with("binary_"))
{
if (name.ends_with("_start"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_start"));
auto & resource = resources[name];
if (!resource.base_address || resource.base_address == base_address)
{
resource.base_address = base_address;
resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
resource.object_name = object_name;
}
}
if (name.ends_with("_end"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_end"));
auto & resource = resources[name];
if (!resource.base_address || resource.base_address == base_address)
{
resource.base_address = base_address;
resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
resource.object_name = object_name;
}
}
}
}
/// Based on the code of musl-libc and the answer of Kanalpiroge on /// Based on the code of musl-libc and the answer of Kanalpiroge on
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
/// It does not extract all the symbols (but only public - exported and used for dynamic linking), /// It does not extract all the symbols (but only public - exported and used for dynamic linking),
/// but will work if we cannot find or parse ELF files. /// but will work if we cannot find or parse ELF files.
void collectSymbolsFromProgramHeaders( void collectSymbolsFromProgramHeaders(
dl_phdr_info * info, dl_phdr_info * info,
std::vector<SymbolIndex::Symbol> & symbols, std::vector<SymbolIndex::Symbol> & symbols)
SymbolIndex::Resources & resources)
{ {
/* Iterate over all headers of the current shared lib /* Iterate over all headers of the current shared lib
* (first call is for the executable itself) * (first call is for the executable itself)
@ -265,9 +228,6 @@ void collectSymbolsFromProgramHeaders(
/// We are not interested in empty symbols. /// We are not interested in empty symbols.
if (elf_sym[sym_index].st_size) if (elf_sym[sym_index].st_size)
symbols.push_back(symbol); symbols.push_back(symbol);
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
} }
break; break;
@ -303,8 +263,7 @@ void collectSymbolsFromELFSymbolTable(
const Elf & elf, const Elf & elf,
const Elf::Section & symbol_table, const Elf::Section & symbol_table,
const Elf::Section & string_table, const Elf::Section & string_table,
std::vector<SymbolIndex::Symbol> & symbols, std::vector<SymbolIndex::Symbol> & symbols)
SymbolIndex::Resources & resources)
{ {
/// Iterate symbol table. /// Iterate symbol table.
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin()); const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
@ -334,8 +293,6 @@ void collectSymbolsFromELFSymbolTable(
if (symbol_table_entry->st_size) if (symbol_table_entry->st_size)
symbols.push_back(symbol); symbols.push_back(symbol);
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
} }
} }
@ -345,8 +302,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
const Elf & elf, const Elf & elf,
unsigned section_header_type, unsigned section_header_type,
const char * string_table_name, const char * string_table_name,
std::vector<SymbolIndex::Symbol> & symbols, std::vector<SymbolIndex::Symbol> & symbols)
SymbolIndex::Resources & resources)
{ {
std::optional<Elf::Section> symbol_table; std::optional<Elf::Section> symbol_table;
std::optional<Elf::Section> string_table; std::optional<Elf::Section> string_table;
@ -364,7 +320,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
return false; return false;
} }
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources); collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
return true; return true;
} }
@ -373,7 +329,6 @@ void collectSymbolsFromELF(
dl_phdr_info * info, dl_phdr_info * info,
std::vector<SymbolIndex::Symbol> & symbols, std::vector<SymbolIndex::Symbol> & symbols,
std::vector<SymbolIndex::Object> & objects, std::vector<SymbolIndex::Object> & objects,
SymbolIndex::Resources & resources,
String & build_id) String & build_id)
{ {
String object_name; String object_name;
@ -485,11 +440,11 @@ void collectSymbolsFromELF(
object.name = object_name; object.name = object_name;
objects.push_back(std::move(object)); objects.push_back(std::move(object));
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources); searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
/// Unneeded if they were parsed from "program headers" of loaded objects. /// Unneeded if they were parsed from "program headers" of loaded objects.
#if defined USE_MUSL #if defined USE_MUSL
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources); searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
#endif #endif
} }
@ -502,8 +457,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
{ {
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr); SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
collectSymbolsFromProgramHeaders(info, data.symbols, data.resources); collectSymbolsFromProgramHeaders(info, data.symbols);
collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id); collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
/* Continue iterations */ /* Continue iterations */
return 0; return 0;

View File

@ -8,6 +8,7 @@
#include <Common/Elf.h> #include <Common/Elf.h>
#include <boost/noncopyable.hpp> #include <boost/noncopyable.hpp>
namespace DB namespace DB
{ {
@ -45,44 +46,15 @@ public:
const std::vector<Symbol> & symbols() const { return data.symbols; } const std::vector<Symbol> & symbols() const { return data.symbols; }
const std::vector<Object> & objects() const { return data.objects; } const std::vector<Object> & objects() const { return data.objects; }
std::string_view getResource(String name) const
{
if (auto it = data.resources.find(name); it != data.resources.end())
return it->second.data();
return {};
}
/// The BuildID that is generated by compiler. /// The BuildID that is generated by compiler.
String getBuildID() const { return data.build_id; } String getBuildID() const { return data.build_id; }
String getBuildIDHex() const; String getBuildIDHex() const;
struct ResourcesBlob
{
/// Symbol can be presented in multiple shared objects,
/// base_address will be used to compare only symbols from the same SO.
ElfW(Addr) base_address = 0;
/// Just a human name of the SO.
std::string_view object_name;
/// Data blob.
std::string_view start;
std::string_view end;
std::string_view data() const
{
assert(end.data() >= start.data());
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
}
};
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
struct Data struct Data
{ {
std::vector<Symbol> symbols; std::vector<Symbol> symbols;
std::vector<Object> objects; std::vector<Object> objects;
String build_id; String build_id;
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
Resources resources;
}; };
private: private:
Data data; Data data;

View File

@ -1,17 +0,0 @@
#include "getResource.h"
#include <boost/algorithm/string/replace.hpp>
#include <Common/SymbolIndex.h>
std::string_view getResource(std::string_view name)
{
// Convert the resource file name into the form generated by `ld -r -b binary`.
std::string name_replaced(name);
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
boost::replace_all(name_replaced, "+", "_PLUS_");
/// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
return DB::SymbolIndex::instance().getResource(name_replaced);
}

View File

@ -1,7 +0,0 @@
#pragma once
#include <string_view>
/// Get resource from binary if exists. Otherwise return empty string view.
/// Resources are data that is embedded into executable at link time.
std::string_view getResource(std::string_view name);

View File

@ -548,4 +548,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
// {0, 0 + 11 * 3600 * 24 + 12, 11}, // {0, 0 + 11 * 3600 * 24 + 12, 11},
})) }))
); );

View File

@ -38,7 +38,6 @@
#include <base/coverage.h> #include <base/coverage.h>
#include <base/sleep.h> #include <base/sleep.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h> #include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
#include <IO/ReadBufferFromFileDescriptor.h> #include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>

View File

@ -1,9 +1,12 @@
#include <Common/FrequencyHolder.h> #include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h> #include <Functions/FunctionsTextClassification.h>
#include <memory> #include <memory>
#include <unordered_map>
namespace DB namespace DB
{ {
@ -46,7 +49,7 @@ namespace
return res; return res;
} }
/// Сount how many times each bigram occurs in the text. /// Count how many times each bigram occurs in the text.
template <typename ModelMap> template <typename ModelMap>
ALWAYS_INLINE inline void calculateStats( ALWAYS_INLINE inline void calculateStats(
const UInt8 * data, const UInt8 * data,
@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset)
} }
} }
#endif

View File

@ -5,19 +5,17 @@
#include <Columns/ColumnMap.h> #include <Columns/ColumnMap.h>
#include <Columns/ColumnArray.h> #include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Common/isValidUTF8.h> #include <Common/isValidUTF8.h>
#include <DataTypes/DataTypeMap.h> #include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h> #include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h> #include <Functions/FunctionsTextClassification.h>
#include <Interpreters/Context.h>
#include <compact_lang_det.h> #include <compact_lang_det.h>
namespace DB namespace DB
{ {
/* Determine language of Unicode UTF-8 text. /* Determine language of Unicode UTF-8 text.

View File

@ -1,4 +1,7 @@
#include <Common/FrequencyHolder.h> #include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h> #include <Functions/FunctionsTextClassification.h>
@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
} }
} }
#endif

View File

@ -1,4 +1,7 @@
#include <Common/FrequencyHolder.h> #include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h> #include <Functions/FunctionsTextClassification.h>
@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
} }
} }
#endif

View File

@ -6,10 +6,16 @@
#include <Poco/Util/LayeredConfiguration.h> #include <Poco/Util/LayeredConfiguration.h>
#include <IO/HTTPCommon.h> #include <IO/HTTPCommon.h>
#include <Common/getResource.h>
#include <re2/re2.h> #include <re2/re2.h>
#include <incbin.h>
/// Embedded HTML pages
INCBIN(resource_play_html, "play.html");
INCBIN(resource_dashboard_html, "dashboard.html");
INCBIN(resource_uplot_js, "js/uplot.js");
namespace DB namespace DB
{ {
@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
if (request.getURI().starts_with("/play")) if (request.getURI().starts_with("/play"))
{ {
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("play.html"); *response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
} }
else if (request.getURI().starts_with("/dashboard")) else if (request.getURI().starts_with("/dashboard"))
{ {
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
std::string html(getResource("dashboard.html")); std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
/// Replace a link to external JavaScript file to embedded file. /// Replace a link to external JavaScript file to embedded file.
/// This allows to open the HTML without running a server and to host it on server. /// This allows to open the HTML without running a server and to host it on server.
@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
else if (request.getURI() == "/js/uplot.js") else if (request.getURI() == "/js/uplot.js")
{ {
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("js/uplot.js"); *response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
} }
else else
{ {

View File

@ -30,7 +30,6 @@ endif()
add_dependencies(generate-source generate-contributors) add_dependencies(generate-source generate-contributors)
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp") set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
add_custom_command( add_custom_command(
OUTPUT StorageSystemLicenses.generated.cpp OUTPUT StorageSystemLicenses.generated.cpp
@ -38,23 +37,13 @@ add_custom_command(
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC}) list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
# Overlength strings # Overlength strings
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
clickhouse_embed_binaries(
TARGET information_schema_metadata
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
RESOURCES schemata.sql tables.sql views.sql columns.sql
)
list (SORT storages_system_sources) # Reproducible build list (SORT storages_system_sources) # Reproducible build
add_library(clickhouse_storages_system ${storages_system_sources}) add_library(clickhouse_storages_system ${storages_system_sources})
add_dependencies(clickhouse_storages_system information_schema_metadata)
target_link_libraries(clickhouse_storages_system PRIVATE target_link_libraries(clickhouse_storages_system PRIVATE
dbms dbms
common common
@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
clickhouse_common_zookeeper clickhouse_common_zookeeper
clickhouse_parsers clickhouse_parsers
Poco::JSON Poco::JSON
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
) )
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)

View File

@ -3,14 +3,21 @@
#include <Storages/System/attachSystemTablesImpl.h> #include <Storages/System/attachSystemTablesImpl.h>
#include <Parsers/ParserCreateQuery.h> #include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h> #include <Parsers/parseQuery.h>
#include <Common/getResource.h> #include <incbin.h>
/// Embedded SQL definitions
INCBIN(resource_schemata_sql, "schemata.sql");
INCBIN(resource_tables_sql, "tables.sql");
INCBIN(resource_views_sql, "views.sql");
INCBIN(resource_columns_sql, "columns.sql");
namespace DB namespace DB
{ {
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
{ {
try try
{ {
@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
String metadata_resource_name = view_name + ".sql"; String metadata_resource_name = view_name + ".sql";
auto attach_query = getResource(metadata_resource_name); if (query.empty())
if (attach_query.empty())
return; return;
ParserCreateQuery parser; ParserCreateQuery parser;
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
"Attach query from embedded resource " + metadata_resource_name, "Attach query from embedded resource " + metadata_resource_name,
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
{ {
createInformationSchemaView(context, information_schema_database, "schemata"); createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
createInformationSchemaView(context, information_schema_database, "tables"); createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
createInformationSchemaView(context, information_schema_database, "views"); createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
createInformationSchemaView(context, information_schema_database, "columns"); createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
} }
} }

View File

@ -238,10 +238,6 @@ defaultValueOfArgumentType
defaultValueOfTypeName defaultValueOfTypeName
degrees degrees
demangle demangle
detectCharset
detectLanguageUnknown
detectProgrammingLanguage
detectTonality
divide divide
dotProduct dotProduct
dumpColumnStructure dumpColumnStructure

View File

@ -15,5 +15,7 @@ AND name NOT IN (
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3', 'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo', 'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD', 'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test 'lemmatize', 'tokenize', 'stem', 'synonyms',
'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
-- these functions are not enabled in fast test
) ORDER BY name; ) ORDER BY name;