mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'incbin' into no-export-dynamic
This commit is contained in:
commit
63659c55a1
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -340,3 +340,6 @@
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/c-ares/c-ares.git
|
||||
[submodule "contrib/incbin"]
|
||||
path = contrib/incbin
|
||||
url = https://github.com/graphitemaster/incbin.git
|
||||
|
@ -1,58 +0,0 @@
|
||||
# Embed a set of resource files into a resulting object file.
|
||||
#
|
||||
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
|
||||
#
|
||||
# This will generate a static library target named `<target>`, which contains the contents of
|
||||
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
|
||||
#
|
||||
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
|
||||
# These are:
|
||||
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
|
||||
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
|
||||
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
|
||||
#
|
||||
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
|
||||
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
|
||||
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
|
||||
macro(clickhouse_embed_binaries)
|
||||
set(one_value_args TARGET RESOURCE_DIR)
|
||||
set(resources RESOURCES)
|
||||
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
|
||||
|
||||
if (NOT DEFINED EMBED_TARGET)
|
||||
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED EMBED_RESOURCE_DIR)
|
||||
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
list(LENGTH EMBED_RESOURCES N_RESOURCES)
|
||||
if (N_RESOURCES LESS 1)
|
||||
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
|
||||
endif()
|
||||
|
||||
add_library("${EMBED_TARGET}" STATIC)
|
||||
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
|
||||
|
||||
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
|
||||
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
|
||||
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
|
||||
|
||||
# Normalize the name of the resource.
|
||||
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
|
||||
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
|
||||
|
||||
# Generate the configured assembly file in the output directory.
|
||||
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
|
||||
|
||||
# Set the include directory for relative paths specified for `.incbin` directive.
|
||||
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
|
||||
|
||||
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
|
||||
set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
|
||||
endforeach()
|
||||
endmacro()
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (incbin-cmake incbin)
|
||||
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_NLP)
|
||||
add_contrib (libstemmer-c-cmake libstemmer_c)
|
||||
add_contrib (wordnet-blast-cmake wordnet-blast)
|
||||
add_contrib (lemmagen-c-cmake lemmagen-c)
|
||||
add_contrib (nlp-data-cmake nlp-data)
|
||||
add_contrib (cld2-cmake cld2)
|
||||
endif()
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
|
||||
|
||||
set (SRCS
|
||||
@ -23,12 +22,10 @@ if (OS_FREEBSD)
|
||||
endif ()
|
||||
|
||||
# Related to time_zones table:
|
||||
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
|
||||
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
|
||||
# as the library that's built using embedded tzdata is also specific to OS_LINUX
|
||||
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
||||
# TimeZones.generated.cpp is autogenerated each time during a build
|
||||
set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
|
||||
# remove existing copies so that its generated fresh on each build.
|
||||
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
|
||||
file(REMOVE ${TIMEZONES_FILE})
|
||||
|
||||
# get the list of timezones from tzdata shipped with cctz
|
||||
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
|
||||
@ -36,28 +33,45 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
|
||||
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
|
||||
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
|
||||
|
||||
set(TIMEZONE_RESOURCE_FILES)
|
||||
|
||||
# each file in that dir (except of tab and localtime) store the info about timezone
|
||||
execute_process(COMMAND
|
||||
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE TIMEZONES)
|
||||
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
|
||||
file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
|
||||
file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
|
||||
|
||||
set (COUNTER 1)
|
||||
foreach(TIMEZONE ${TIMEZONES})
|
||||
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n")
|
||||
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||
endforeach(TIMEZONE)
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
|
||||
|
||||
foreach(TIMEZONE ${TIMEZONES})
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
|
||||
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
|
||||
file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
|
||||
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||
endforeach(TIMEZONE)
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
|
||||
clickhouse_embed_binaries(
|
||||
TARGET tzdata
|
||||
RESOURCE_DIR "${TZDIR}"
|
||||
RESOURCES ${TIMEZONE_RESOURCE_FILES}
|
||||
)
|
||||
add_dependencies(_cctz tzdata)
|
||||
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
|
||||
file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
|
||||
|
||||
set (COUNTER 1)
|
||||
foreach(TIMEZONE ${TIMEZONES})
|
||||
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
|
||||
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||
endforeach(TIMEZONE)
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} " return {};\n")
|
||||
file(APPEND ${TIMEZONES_FILE} "}\n")
|
||||
|
||||
add_library (tzdata ${TIMEZONES_FILE})
|
||||
target_link_libraries(tzdata ch_contrib::incbin)
|
||||
target_include_directories(tzdata PRIVATE ${TZDIR})
|
||||
target_link_libraries(_cctz tzdata)
|
||||
|
||||
add_library(ch_contrib::cctz ALIAS _cctz)
|
||||
|
1
contrib/incbin
vendored
Submodule
1
contrib/incbin
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf
|
4
contrib/incbin-cmake/CMakeLists.txt
Normal file
4
contrib/incbin-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
|
||||
add_library(_incbin INTERFACE)
|
||||
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
|
||||
add_library(ch_contrib::incbin ALIAS _incbin)
|
@ -1,15 +0,0 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
|
||||
|
||||
add_library (_nlp_data INTERFACE)
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET nlp_dictionaries
|
||||
RESOURCE_DIR "${LIBRARY_DIR}"
|
||||
RESOURCES charset.zst tonality_ru.zst programming.zst
|
||||
)
|
||||
|
||||
add_dependencies(_nlp_data nlp_dictionaries)
|
||||
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
add_library(ch_contrib::nlp_data ALIAS _nlp_data)
|
@ -147,6 +147,7 @@ function clone_submodules
|
||||
contrib/simdjson
|
||||
contrib/liburing
|
||||
contrib/libfiu
|
||||
contrib/incbin
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
|
@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK
|
||||
)
|
||||
|
||||
clickhouse_program_add_library(install)
|
||||
|
||||
# For incbin
|
||||
target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server")
|
||||
|
@ -20,10 +20,7 @@
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/OpenSSLHelpers.h>
|
||||
#include <base/hex.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <base/sleep.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
@ -35,6 +32,12 @@
|
||||
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
|
||||
#include <incbin.h>
|
||||
|
||||
/// Embedded configuration files used inside the install program
|
||||
INCBIN(resource_config_xml, "config.xml");
|
||||
INCBIN(resource_users_xml, "users.xml");
|
||||
|
||||
|
||||
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
|
||||
* It also allows to avoid dependency on systemd, upstart, SysV init.
|
||||
@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
if (!fs::exists(main_config_file))
|
||||
{
|
||||
std::string_view main_config_content = getResource("config.xml");
|
||||
std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
|
||||
if (main_config_content.empty())
|
||||
{
|
||||
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
|
||||
@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
if (!fs::exists(users_config_file))
|
||||
{
|
||||
std::string_view users_config_content = getResource("users.xml");
|
||||
std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
|
||||
if (users_config_content.empty())
|
||||
{
|
||||
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
|
||||
|
@ -1,16 +1,3 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
if (OS_LINUX)
|
||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
# for some reason INTERFACE linkage doesn't work for standalone binary
|
||||
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
endif ()
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET clickhouse_keeper_configs
|
||||
RESOURCES keeper_config.xml keeper_embedded.xml
|
||||
)
|
||||
|
||||
set(CLICKHOUSE_KEEPER_SOURCES
|
||||
Keeper.cpp
|
||||
)
|
||||
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
|
||||
clickhouse_program_add(keeper)
|
||||
|
||||
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
|
||||
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
|
||||
|
||||
if (BUILD_STANDALONE_KEEPER)
|
||||
# Straight list of all required sources
|
||||
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
|
||||
)
|
||||
|
||||
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
|
||||
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
||||
|
||||
if (SPLIT_DEBUG_SYMBOLS)
|
||||
|
@ -457,8 +457,10 @@ try
|
||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||
|
||||
std::vector<std::string> extra_paths = {include_from_path};
|
||||
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty()) extra_paths.emplace_back(key_path);
|
||||
if (!cert_path.empty())
|
||||
extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty())
|
||||
extra_paths.emplace_back(key_path);
|
||||
|
||||
/// ConfigReloader have to strict parameters which are redundant in our case
|
||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||
|
@ -1,12 +1,8 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
set(CLICKHOUSE_SERVER_SOURCES
|
||||
MetricsTransmitter.cpp
|
||||
Server.cpp
|
||||
)
|
||||
|
||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
|
||||
set (CLICKHOUSE_SERVER_LINK
|
||||
PRIVATE
|
||||
clickhouse_aggregate_functions
|
||||
@ -33,10 +29,4 @@ endif()
|
||||
|
||||
clickhouse_program_add(server)
|
||||
|
||||
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET clickhouse_server_configs
|
||||
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
|
||||
)
|
||||
add_dependencies(clickhouse-server-lib clickhouse_server_configs)
|
||||
target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
@ -128,6 +128,10 @@
|
||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
#endif
|
||||
|
||||
#include <incbin.h>
|
||||
/// A minimal file used when the server is run without installation
|
||||
INCBIN(resource_embedded_xml, "embedded.xml");
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric Revision;
|
||||
@ -393,6 +397,7 @@ int Server::run()
|
||||
|
||||
void Server::initialize(Poco::Util::Application & self)
|
||||
{
|
||||
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
|
||||
BaseDaemon::initialize(self);
|
||||
logger().information("starting up");
|
||||
|
||||
@ -1105,8 +1110,10 @@ try
|
||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||
|
||||
std::vector<std::string> extra_paths = {include_from_path};
|
||||
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty()) extra_paths.emplace_back(key_path);
|
||||
if (!cert_path.empty())
|
||||
extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty())
|
||||
extra_paths.emplace_back(key_path);
|
||||
|
||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||
config_path,
|
||||
|
0
programs/server/resources.cpp
Normal file
0
programs/server/resources.cpp
Normal file
@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
|
||||
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
|
||||
endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
|
||||
|
||||
add_subdirectory(Access/Common)
|
||||
add_subdirectory(Common/ZooKeeper)
|
||||
@ -296,7 +296,7 @@ macro (dbms_target_include_directories)
|
||||
endforeach ()
|
||||
endmacro ()
|
||||
|
||||
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
|
||||
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server")
|
||||
target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
|
||||
|
||||
if (TARGET ch_contrib::llvm)
|
||||
@ -561,7 +561,7 @@ if (ENABLE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
|
||||
target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::ulid)
|
||||
|
@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
|
||||
endif()
|
||||
|
||||
if (ENABLE_MYSQL)
|
||||
add_subdirectory (mysqlxx)
|
||||
add_subdirectory(mysqlxx)
|
||||
endif ()
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <Common/XMLUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/errnoToString.h>
|
||||
@ -83,6 +82,13 @@ ConfigProcessor::~ConfigProcessor()
|
||||
Poco::Logger::destroy("ConfigProcessor");
|
||||
}
|
||||
|
||||
static std::unordered_map<std::string, std::string_view> embedded_configs;
|
||||
|
||||
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
|
||||
{
|
||||
embedded_configs[name] = content;
|
||||
}
|
||||
|
||||
|
||||
/// Vector containing the name of the element and a sorted list of attribute names and values
|
||||
/// (except "remove" and "replace" attributes).
|
||||
@ -281,15 +287,15 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
{
|
||||
std::string value = node->nodeValue();
|
||||
|
||||
bool replace_occured = false;
|
||||
bool replace_occurred = false;
|
||||
size_t pos;
|
||||
while ((pos = value.find(substitution.first)) != std::string::npos)
|
||||
{
|
||||
value.replace(pos, substitution.first.length(), substitution.second);
|
||||
replace_occured = true;
|
||||
replace_occurred = true;
|
||||
}
|
||||
|
||||
if (replace_occured)
|
||||
if (replace_occurred)
|
||||
node->setNodeValue(value);
|
||||
}
|
||||
}
|
||||
@ -528,26 +534,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
|
||||
}
|
||||
else
|
||||
{
|
||||
/// These embedded files added during build with some cmake magic.
|
||||
/// Look at the end of programs/server/CMakeLists.txt.
|
||||
std::string embedded_name;
|
||||
if (path == "config.xml")
|
||||
embedded_name = "embedded.xml";
|
||||
|
||||
if (path == "keeper_config.xml")
|
||||
embedded_name = "keeper_embedded.xml";
|
||||
|
||||
/// When we can use config embedded in binary.
|
||||
if (!embedded_name.empty())
|
||||
/// When we can use a config embedded in the binary.
|
||||
if (auto it = embedded_configs.find(path); it != embedded_configs.end())
|
||||
{
|
||||
auto resource = getResource(embedded_name);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
|
||||
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
|
||||
config = dom_parser.parseMemory(resource.data(), resource.size());
|
||||
config = dom_parser.parseMemory(it->second.data(), it->second.size());
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path);
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
|
||||
}
|
||||
|
||||
std::vector<std::string> contributing_files;
|
||||
|
@ -65,6 +65,9 @@ public:
|
||||
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
|
||||
const zkutil::EventPtr & zk_changed_event = nullptr);
|
||||
|
||||
/// These configurations will be used if there is no configuration file.
|
||||
static void registerEmbeddedConfig(std::string name, std::string_view content);
|
||||
|
||||
|
||||
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
|
||||
/// The resulting XML document is saved into a file with the name
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <cctz/civil_time.h>
|
||||
#include <cctz/time_zone.h>
|
||||
#include <cctz/zone_info_source.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
@ -11,6 +10,11 @@
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
/// Embedded timezones.
|
||||
std::string_view getTimeZone(const char * name);
|
||||
|
||||
|
||||
namespace
|
||||
@ -249,9 +253,10 @@ namespace cctz_extension
|
||||
const std::string & name,
|
||||
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
|
||||
{
|
||||
std::string_view resource = getResource(name);
|
||||
if (!resource.empty())
|
||||
return std::make_unique<Source>(resource.data(), resource.size());
|
||||
std::string_view tz_file = getTimeZone(name.data());
|
||||
|
||||
if (!tz_file.empty())
|
||||
return std::make_unique<Source>(tz_file.data(), tz_file.size());
|
||||
|
||||
return fallback(name);
|
||||
}
|
||||
|
185
src/Common/FrequencyHolder.cpp
Normal file
185
src/Common/FrequencyHolder.cpp
Normal file
@ -0,0 +1,185 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <incbin.h>
|
||||
|
||||
/// Embedded SQL definitions
|
||||
INCBIN(resource_charset_zst, "charset.zst");
|
||||
INCBIN(resource_tonality_ru_zst, "tonality_ru.zst");
|
||||
INCBIN(resource_programming_zst, "programming.zst");
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
|
||||
FrequencyHolder & FrequencyHolder::getInstance()
|
||||
{
|
||||
static FrequencyHolder instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
FrequencyHolder::FrequencyHolder()
|
||||
{
|
||||
loadEmotionalDict();
|
||||
loadEncodingsFrequency();
|
||||
loadProgrammingFrequency();
|
||||
}
|
||||
|
||||
void FrequencyHolder::loadEncodingsFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded charset frequencies");
|
||||
|
||||
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
|
||||
|
||||
String line;
|
||||
UInt16 bigram;
|
||||
Float64 frequency;
|
||||
String charset_name;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new charset
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(charset_name, buf_line);
|
||||
|
||||
/* In our dictionary we have lines with form: <Language>_<Charset>
|
||||
* If we need to find language of data, we return <Language>
|
||||
* If we need to find charset of data, we return <Charset>.
|
||||
*/
|
||||
size_t sep = charset_name.find('_');
|
||||
|
||||
Encoding enc;
|
||||
enc.lang = charset_name.substr(0, sep);
|
||||
enc.name = charset_name.substr(sep + 1);
|
||||
encodings_freq.push_back(std::move(enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
readIntText(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
encodings_freq.back().map[bigram] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
|
||||
}
|
||||
|
||||
void FrequencyHolder::loadEmotionalDict()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
|
||||
LOG_TRACE(log, "Loading embedded emotional dictionary");
|
||||
|
||||
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
|
||||
|
||||
String line;
|
||||
String word;
|
||||
Float64 tonality;
|
||||
size_t count = 0;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
readStringUntilWhitespace(word, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(tonality, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
|
||||
emotional_dict[ref] = tonality;
|
||||
++count;
|
||||
}
|
||||
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
|
||||
}
|
||||
|
||||
void FrequencyHolder::loadProgrammingFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
|
||||
|
||||
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
|
||||
|
||||
String line;
|
||||
String bigram;
|
||||
Float64 frequency;
|
||||
String programming_language;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new language
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(programming_language, buf_line);
|
||||
|
||||
Language lang;
|
||||
lang.name = programming_language;
|
||||
programming_freq.push_back(std::move(lang));
|
||||
}
|
||||
else
|
||||
{
|
||||
readStringUntilWhitespace(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
|
||||
programming_freq.back().map[ref] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Programming languages frequencies was added");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,5 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <base/StringRef.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
@ -7,7 +11,6 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
@ -20,11 +23,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
/// FrequencyHolder class is responsible for storing and loading dictionaries
|
||||
/// needed for text classification functions:
|
||||
///
|
||||
@ -56,11 +54,7 @@ public:
|
||||
using EncodingMap = HashMap<UInt16, Float64>;
|
||||
using EncodingContainer = std::vector<Encoding>;
|
||||
|
||||
static FrequencyHolder & getInstance()
|
||||
{
|
||||
static FrequencyHolder instance;
|
||||
return instance;
|
||||
}
|
||||
static FrequencyHolder & getInstance();
|
||||
|
||||
const Map & getEmotionalDict() const
|
||||
{
|
||||
@ -78,161 +72,11 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
FrequencyHolder();
|
||||
|
||||
FrequencyHolder()
|
||||
{
|
||||
loadEmotionalDict();
|
||||
loadEncodingsFrequency();
|
||||
loadProgrammingFrequency();
|
||||
}
|
||||
|
||||
void loadEncodingsFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded charset frequencies");
|
||||
|
||||
auto resource = getResource("charset.zst");
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
|
||||
|
||||
String line;
|
||||
UInt16 bigram;
|
||||
Float64 frequency;
|
||||
String charset_name;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new charset
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(charset_name, buf_line);
|
||||
|
||||
/* In our dictionary we have lines with form: <Language>_<Charset>
|
||||
* If we need to find language of data, we return <Language>
|
||||
* If we need to find charset of data, we return <Charset>.
|
||||
*/
|
||||
size_t sep = charset_name.find('_');
|
||||
|
||||
Encoding enc;
|
||||
enc.lang = charset_name.substr(0, sep);
|
||||
enc.name = charset_name.substr(sep + 1);
|
||||
encodings_freq.push_back(std::move(enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
readIntText(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
encodings_freq.back().map[bigram] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
|
||||
}
|
||||
|
||||
void loadEmotionalDict()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
|
||||
LOG_TRACE(log, "Loading embedded emotional dictionary");
|
||||
|
||||
auto resource = getResource("tonality_ru.zst");
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
|
||||
|
||||
String line;
|
||||
String word;
|
||||
Float64 tonality;
|
||||
size_t count = 0;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
readStringUntilWhitespace(word, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(tonality, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
|
||||
emotional_dict[ref] = tonality;
|
||||
++count;
|
||||
}
|
||||
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
|
||||
}
|
||||
|
||||
void loadProgrammingFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
|
||||
|
||||
auto resource = getResource("programming.zst");
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
|
||||
|
||||
String line;
|
||||
String bigram;
|
||||
Float64 frequency;
|
||||
String programming_language;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new language
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(programming_language, buf_line);
|
||||
|
||||
Language lang;
|
||||
lang.name = programming_language;
|
||||
programming_freq.push_back(std::move(lang));
|
||||
}
|
||||
else
|
||||
{
|
||||
readStringUntilWhitespace(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
|
||||
programming_freq.back().map[ref] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Programming languages frequencies was added");
|
||||
}
|
||||
void loadEncodingsFrequency();
|
||||
void loadEmotionalDict();
|
||||
void loadProgrammingFrequency();
|
||||
|
||||
Arena string_pool;
|
||||
|
||||
@ -241,3 +85,5 @@ private:
|
||||
EncodingContainer encodings_freq;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -88,50 +88,13 @@ namespace
|
||||
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
|
||||
|
||||
|
||||
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
|
||||
{
|
||||
const char * char_address = static_cast<const char *>(address);
|
||||
|
||||
if (name.starts_with("_binary_") || name.starts_with("binary_"))
|
||||
{
|
||||
if (name.ends_with("_start"))
|
||||
{
|
||||
name = name.substr((name[0] == '_') + strlen("binary_"));
|
||||
name = name.substr(0, name.size() - strlen("_start"));
|
||||
|
||||
auto & resource = resources[name];
|
||||
if (!resource.base_address || resource.base_address == base_address)
|
||||
{
|
||||
resource.base_address = base_address;
|
||||
resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
|
||||
resource.object_name = object_name;
|
||||
}
|
||||
}
|
||||
if (name.ends_with("_end"))
|
||||
{
|
||||
name = name.substr((name[0] == '_') + strlen("binary_"));
|
||||
name = name.substr(0, name.size() - strlen("_end"));
|
||||
|
||||
auto & resource = resources[name];
|
||||
if (!resource.base_address || resource.base_address == base_address)
|
||||
{
|
||||
resource.base_address = base_address;
|
||||
resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
|
||||
resource.object_name = object_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Based on the code of musl-libc and the answer of Kanalpiroge on
|
||||
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
|
||||
/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
|
||||
/// but will work if we cannot find or parse ELF files.
|
||||
void collectSymbolsFromProgramHeaders(
|
||||
dl_phdr_info * info,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
SymbolIndex::Resources & resources)
|
||||
std::vector<SymbolIndex::Symbol> & symbols)
|
||||
{
|
||||
/* Iterate over all headers of the current shared lib
|
||||
* (first call is for the executable itself)
|
||||
@ -265,9 +228,6 @@ void collectSymbolsFromProgramHeaders(
|
||||
/// We are not interested in empty symbols.
|
||||
if (elf_sym[sym_index].st_size)
|
||||
symbols.push_back(symbol);
|
||||
|
||||
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
|
||||
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
|
||||
}
|
||||
|
||||
break;
|
||||
@ -303,8 +263,7 @@ void collectSymbolsFromELFSymbolTable(
|
||||
const Elf & elf,
|
||||
const Elf::Section & symbol_table,
|
||||
const Elf::Section & string_table,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
SymbolIndex::Resources & resources)
|
||||
std::vector<SymbolIndex::Symbol> & symbols)
|
||||
{
|
||||
/// Iterate symbol table.
|
||||
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
|
||||
@ -334,8 +293,6 @@ void collectSymbolsFromELFSymbolTable(
|
||||
|
||||
if (symbol_table_entry->st_size)
|
||||
symbols.push_back(symbol);
|
||||
|
||||
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
|
||||
}
|
||||
}
|
||||
|
||||
@ -345,8 +302,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
||||
const Elf & elf,
|
||||
unsigned section_header_type,
|
||||
const char * string_table_name,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
SymbolIndex::Resources & resources)
|
||||
std::vector<SymbolIndex::Symbol> & symbols)
|
||||
{
|
||||
std::optional<Elf::Section> symbol_table;
|
||||
std::optional<Elf::Section> string_table;
|
||||
@ -364,7 +320,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
||||
return false;
|
||||
}
|
||||
|
||||
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources);
|
||||
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -373,7 +329,6 @@ void collectSymbolsFromELF(
|
||||
dl_phdr_info * info,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
std::vector<SymbolIndex::Object> & objects,
|
||||
SymbolIndex::Resources & resources,
|
||||
String & build_id)
|
||||
{
|
||||
String object_name;
|
||||
@ -485,11 +440,11 @@ void collectSymbolsFromELF(
|
||||
object.name = object_name;
|
||||
objects.push_back(std::move(object));
|
||||
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources);
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
|
||||
|
||||
/// Unneeded if they were parsed from "program headers" of loaded objects.
|
||||
#if defined USE_MUSL
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources);
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -502,8 +457,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
|
||||
{
|
||||
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
|
||||
|
||||
collectSymbolsFromProgramHeaders(info, data.symbols, data.resources);
|
||||
collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id);
|
||||
collectSymbolsFromProgramHeaders(info, data.symbols);
|
||||
collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
|
||||
|
||||
/* Continue iterations */
|
||||
return 0;
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/Elf.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -45,44 +46,15 @@ public:
|
||||
const std::vector<Symbol> & symbols() const { return data.symbols; }
|
||||
const std::vector<Object> & objects() const { return data.objects; }
|
||||
|
||||
std::string_view getResource(String name) const
|
||||
{
|
||||
if (auto it = data.resources.find(name); it != data.resources.end())
|
||||
return it->second.data();
|
||||
return {};
|
||||
}
|
||||
|
||||
/// The BuildID that is generated by compiler.
|
||||
String getBuildID() const { return data.build_id; }
|
||||
String getBuildIDHex() const;
|
||||
|
||||
struct ResourcesBlob
|
||||
{
|
||||
/// Symbol can be presented in multiple shared objects,
|
||||
/// base_address will be used to compare only symbols from the same SO.
|
||||
ElfW(Addr) base_address = 0;
|
||||
/// Just a human name of the SO.
|
||||
std::string_view object_name;
|
||||
/// Data blob.
|
||||
std::string_view start;
|
||||
std::string_view end;
|
||||
|
||||
std::string_view data() const
|
||||
{
|
||||
assert(end.data() >= start.data());
|
||||
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
|
||||
}
|
||||
};
|
||||
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
std::vector<Symbol> symbols;
|
||||
std::vector<Object> objects;
|
||||
String build_id;
|
||||
|
||||
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
|
||||
Resources resources;
|
||||
};
|
||||
private:
|
||||
Data data;
|
||||
|
@ -1,17 +0,0 @@
|
||||
#include "getResource.h"
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <Common/SymbolIndex.h>
|
||||
|
||||
|
||||
std::string_view getResource(std::string_view name)
|
||||
{
|
||||
// Convert the resource file name into the form generated by `ld -r -b binary`.
|
||||
std::string name_replaced(name);
|
||||
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
|
||||
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
|
||||
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
|
||||
boost::replace_all(name_replaced, "+", "_PLUS_");
|
||||
|
||||
/// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
|
||||
return DB::SymbolIndex::instance().getResource(name_replaced);
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
|
||||
/// Get resource from binary if exists. Otherwise return empty string view.
|
||||
/// Resources are data that is embedded into executable at link time.
|
||||
std::string_view getResource(std::string_view name);
|
@ -548,4 +548,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
|
||||
// {0, 0 + 11 * 3600 * 24 + 12, 11},
|
||||
}))
|
||||
);
|
||||
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include <base/coverage.h>
|
||||
#include <base/sleep.h>
|
||||
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
@ -1,9 +1,12 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -46,7 +49,7 @@ namespace
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Сount how many times each bigram occurs in the text.
|
||||
/// Count how many times each bigram occurs in the text.
|
||||
template <typename ModelMap>
|
||||
ALWAYS_INLINE inline void calculateStats(
|
||||
const UInt8 * data,
|
||||
@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -5,19 +5,17 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/isValidUTF8.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <compact_lang_det.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/* Determine language of Unicode UTF-8 text.
|
||||
|
@ -1,4 +1,7 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,4 +1,7 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -6,10 +6,16 @@
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <Common/getResource.h>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <incbin.h>
|
||||
|
||||
/// Embedded HTML pages
|
||||
INCBIN(resource_play_html, "play.html");
|
||||
INCBIN(resource_dashboard_html, "dashboard.html");
|
||||
INCBIN(resource_uplot_js, "js/uplot.js");
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
|
||||
if (request.getURI().starts_with("/play"))
|
||||
{
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||
*response.send() << getResource("play.html");
|
||||
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
|
||||
}
|
||||
else if (request.getURI().starts_with("/dashboard"))
|
||||
{
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||
|
||||
std::string html(getResource("dashboard.html"));
|
||||
std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
|
||||
|
||||
/// Replace a link to external JavaScript file to embedded file.
|
||||
/// This allows to open the HTML without running a server and to host it on server.
|
||||
@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
|
||||
else if (request.getURI() == "/js/uplot.js")
|
||||
{
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||
*response.send() << getResource("js/uplot.js");
|
||||
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -30,7 +30,6 @@ endif()
|
||||
add_dependencies(generate-source generate-contributors)
|
||||
|
||||
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
|
||||
set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT StorageSystemLicenses.generated.cpp
|
||||
@ -38,23 +37,13 @@ add_custom_command(
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
|
||||
list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
|
||||
|
||||
# Overlength strings
|
||||
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
|
||||
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
clickhouse_embed_binaries(
|
||||
TARGET information_schema_metadata
|
||||
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
|
||||
RESOURCES schemata.sql tables.sql views.sql columns.sql
|
||||
)
|
||||
|
||||
list (SORT storages_system_sources) # Reproducible build
|
||||
add_library(clickhouse_storages_system ${storages_system_sources})
|
||||
|
||||
add_dependencies(clickhouse_storages_system information_schema_metadata)
|
||||
|
||||
target_link_libraries(clickhouse_storages_system PRIVATE
|
||||
dbms
|
||||
common
|
||||
@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
|
||||
clickhouse_common_zookeeper
|
||||
clickhouse_parsers
|
||||
Poco::JSON
|
||||
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
|
||||
)
|
||||
|
||||
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)
|
||||
|
@ -3,14 +3,21 @@
|
||||
#include <Storages/System/attachSystemTablesImpl.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <incbin.h>
|
||||
|
||||
/// Embedded SQL definitions
|
||||
INCBIN(resource_schemata_sql, "schemata.sql");
|
||||
INCBIN(resource_tables_sql, "tables.sql");
|
||||
INCBIN(resource_views_sql, "views.sql");
|
||||
INCBIN(resource_columns_sql, "columns.sql");
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
|
||||
|
||||
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name)
|
||||
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
||||
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
|
||||
|
||||
String metadata_resource_name = view_name + ".sql";
|
||||
auto attach_query = getResource(metadata_resource_name);
|
||||
if (attach_query.empty())
|
||||
if (query.empty())
|
||||
return;
|
||||
|
||||
ParserCreateQuery parser;
|
||||
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(),
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
|
||||
"Attach query from embedded resource " + metadata_resource_name,
|
||||
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
|
||||
@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
||||
|
||||
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
|
||||
{
|
||||
createInformationSchemaView(context, information_schema_database, "schemata");
|
||||
createInformationSchemaView(context, information_schema_database, "tables");
|
||||
createInformationSchemaView(context, information_schema_database, "views");
|
||||
createInformationSchemaView(context, information_schema_database, "columns");
|
||||
createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
|
||||
createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
|
||||
createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
|
||||
createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -238,10 +238,6 @@ defaultValueOfArgumentType
|
||||
defaultValueOfTypeName
|
||||
degrees
|
||||
demangle
|
||||
detectCharset
|
||||
detectLanguageUnknown
|
||||
detectProgrammingLanguage
|
||||
detectTonality
|
||||
divide
|
||||
dotProduct
|
||||
dumpColumnStructure
|
||||
|
@ -15,5 +15,7 @@ AND name NOT IN (
|
||||
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
|
||||
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
|
||||
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
|
||||
'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test
|
||||
'lemmatize', 'tokenize', 'stem', 'synonyms',
|
||||
'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
|
||||
-- these functions are not enabled in fast test
|
||||
) ORDER BY name;
|
||||
|
Loading…
Reference in New Issue
Block a user