mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'incbin' into no-export-dynamic
This commit is contained in:
commit
63659c55a1
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -340,3 +340,6 @@
|
|||||||
[submodule "contrib/c-ares"]
|
[submodule "contrib/c-ares"]
|
||||||
path = contrib/c-ares
|
path = contrib/c-ares
|
||||||
url = https://github.com/c-ares/c-ares.git
|
url = https://github.com/c-ares/c-ares.git
|
||||||
|
[submodule "contrib/incbin"]
|
||||||
|
path = contrib/incbin
|
||||||
|
url = https://github.com/graphitemaster/incbin.git
|
||||||
|
@ -1,58 +0,0 @@
|
|||||||
# Embed a set of resource files into a resulting object file.
|
|
||||||
#
|
|
||||||
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
|
|
||||||
#
|
|
||||||
# This will generate a static library target named `<target>`, which contains the contents of
|
|
||||||
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
|
|
||||||
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
|
|
||||||
#
|
|
||||||
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
|
|
||||||
# These are:
|
|
||||||
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
|
|
||||||
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
|
|
||||||
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
|
|
||||||
#
|
|
||||||
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
|
|
||||||
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
|
|
||||||
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
|
|
||||||
macro(clickhouse_embed_binaries)
|
|
||||||
set(one_value_args TARGET RESOURCE_DIR)
|
|
||||||
set(resources RESOURCES)
|
|
||||||
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
|
|
||||||
|
|
||||||
if (NOT DEFINED EMBED_TARGET)
|
|
||||||
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (NOT DEFINED EMBED_RESOURCE_DIR)
|
|
||||||
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
list(LENGTH EMBED_RESOURCES N_RESOURCES)
|
|
||||||
if (N_RESOURCES LESS 1)
|
|
||||||
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
add_library("${EMBED_TARGET}" STATIC)
|
|
||||||
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
|
|
||||||
|
|
||||||
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
|
|
||||||
|
|
||||||
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
|
|
||||||
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
|
|
||||||
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
|
|
||||||
|
|
||||||
# Normalize the name of the resource.
|
|
||||||
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
|
|
||||||
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
|
|
||||||
|
|
||||||
# Generate the configured assembly file in the output directory.
|
|
||||||
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
|
|
||||||
|
|
||||||
# Set the include directory for relative paths specified for `.incbin` directive.
|
|
||||||
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
|
|
||||||
|
|
||||||
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
|
|
||||||
set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
|
|
||||||
endforeach()
|
|
||||||
endmacro()
|
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
|
|||||||
add_contrib (nuraft-cmake NuRaft)
|
add_contrib (nuraft-cmake NuRaft)
|
||||||
add_contrib (fast_float-cmake fast_float)
|
add_contrib (fast_float-cmake fast_float)
|
||||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||||
|
add_contrib (incbin-cmake incbin)
|
||||||
|
|
||||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||||
if (ENABLE_NLP)
|
if (ENABLE_NLP)
|
||||||
add_contrib (libstemmer-c-cmake libstemmer_c)
|
add_contrib (libstemmer-c-cmake libstemmer_c)
|
||||||
add_contrib (wordnet-blast-cmake wordnet-blast)
|
add_contrib (wordnet-blast-cmake wordnet-blast)
|
||||||
add_contrib (lemmagen-c-cmake lemmagen-c)
|
add_contrib (lemmagen-c-cmake lemmagen-c)
|
||||||
add_contrib (nlp-data-cmake nlp-data)
|
|
||||||
add_contrib (cld2-cmake cld2)
|
add_contrib (cld2-cmake cld2)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
|
||||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
|
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
|
||||||
|
|
||||||
set (SRCS
|
set (SRCS
|
||||||
@ -23,12 +22,10 @@ if (OS_FREEBSD)
|
|||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
# Related to time_zones table:
|
# Related to time_zones table:
|
||||||
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
|
# TimeZones.generated.cpp is autogenerated each time during a build
|
||||||
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
|
set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
|
||||||
# as the library that's built using embedded tzdata is also specific to OS_LINUX
|
|
||||||
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
|
||||||
# remove existing copies so that its generated fresh on each build.
|
# remove existing copies so that its generated fresh on each build.
|
||||||
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
|
file(REMOVE ${TIMEZONES_FILE})
|
||||||
|
|
||||||
# get the list of timezones from tzdata shipped with cctz
|
# get the list of timezones from tzdata shipped with cctz
|
||||||
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
|
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
|
||||||
@ -36,28 +33,45 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
|
|||||||
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
|
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
|
||||||
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
|
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
|
||||||
|
|
||||||
set(TIMEZONE_RESOURCE_FILES)
|
|
||||||
|
|
||||||
# each file in that dir (except of tab and localtime) store the info about timezone
|
# each file in that dir (except of tab and localtime) store the info about timezone
|
||||||
execute_process(COMMAND
|
execute_process(COMMAND
|
||||||
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
|
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
OUTPUT_VARIABLE TIMEZONES)
|
OUTPUT_VARIABLE TIMEZONES)
|
||||||
|
|
||||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
|
file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
|
||||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
|
file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
|
||||||
|
|
||||||
|
set (COUNTER 1)
|
||||||
|
foreach(TIMEZONE ${TIMEZONES})
|
||||||
|
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n")
|
||||||
|
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||||
|
endforeach(TIMEZONE)
|
||||||
|
|
||||||
|
file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
|
||||||
|
|
||||||
foreach(TIMEZONE ${TIMEZONES})
|
foreach(TIMEZONE ${TIMEZONES})
|
||||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
|
file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
|
||||||
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
|
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||||
endforeach(TIMEZONE)
|
endforeach(TIMEZONE)
|
||||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
|
|
||||||
clickhouse_embed_binaries(
|
file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
|
||||||
TARGET tzdata
|
|
||||||
RESOURCE_DIR "${TZDIR}"
|
file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
|
||||||
RESOURCES ${TIMEZONE_RESOURCE_FILES}
|
file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
|
||||||
)
|
|
||||||
add_dependencies(_cctz tzdata)
|
set (COUNTER 1)
|
||||||
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
|
foreach(TIMEZONE ${TIMEZONES})
|
||||||
|
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
|
||||||
|
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||||
|
endforeach(TIMEZONE)
|
||||||
|
|
||||||
|
file(APPEND ${TIMEZONES_FILE} " return {};\n")
|
||||||
|
file(APPEND ${TIMEZONES_FILE} "}\n")
|
||||||
|
|
||||||
|
add_library (tzdata ${TIMEZONES_FILE})
|
||||||
|
target_link_libraries(tzdata ch_contrib::incbin)
|
||||||
|
target_include_directories(tzdata PRIVATE ${TZDIR})
|
||||||
|
target_link_libraries(_cctz tzdata)
|
||||||
|
|
||||||
add_library(ch_contrib::cctz ALIAS _cctz)
|
add_library(ch_contrib::cctz ALIAS _cctz)
|
||||||
|
1
contrib/incbin
vendored
Submodule
1
contrib/incbin
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf
|
4
contrib/incbin-cmake/CMakeLists.txt
Normal file
4
contrib/incbin-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
|
||||||
|
add_library(_incbin INTERFACE)
|
||||||
|
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
|
||||||
|
add_library(ch_contrib::incbin ALIAS _incbin)
|
@ -1,15 +0,0 @@
|
|||||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
|
||||||
|
|
||||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
|
|
||||||
|
|
||||||
add_library (_nlp_data INTERFACE)
|
|
||||||
|
|
||||||
clickhouse_embed_binaries(
|
|
||||||
TARGET nlp_dictionaries
|
|
||||||
RESOURCE_DIR "${LIBRARY_DIR}"
|
|
||||||
RESOURCES charset.zst tonality_ru.zst programming.zst
|
|
||||||
)
|
|
||||||
|
|
||||||
add_dependencies(_nlp_data nlp_dictionaries)
|
|
||||||
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
|
|
||||||
add_library(ch_contrib::nlp_data ALIAS _nlp_data)
|
|
@ -147,6 +147,7 @@ function clone_submodules
|
|||||||
contrib/simdjson
|
contrib/simdjson
|
||||||
contrib/liburing
|
contrib/liburing
|
||||||
contrib/libfiu
|
contrib/libfiu
|
||||||
|
contrib/incbin
|
||||||
)
|
)
|
||||||
|
|
||||||
git submodule sync
|
git submodule sync
|
||||||
|
@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK
|
|||||||
)
|
)
|
||||||
|
|
||||||
clickhouse_program_add_library(install)
|
clickhouse_program_add_library(install)
|
||||||
|
|
||||||
|
# For incbin
|
||||||
|
target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server")
|
||||||
|
@ -20,10 +20,7 @@
|
|||||||
#include <Common/formatReadable.h>
|
#include <Common/formatReadable.h>
|
||||||
#include <Common/Config/ConfigProcessor.h>
|
#include <Common/Config/ConfigProcessor.h>
|
||||||
#include <Common/OpenSSLHelpers.h>
|
#include <Common/OpenSSLHelpers.h>
|
||||||
#include <base/hex.h>
|
|
||||||
#include <Common/getResource.h>
|
|
||||||
#include <base/sleep.h>
|
#include <base/sleep.h>
|
||||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
|
||||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||||
#include <IO/ReadBufferFromFile.h>
|
#include <IO/ReadBufferFromFile.h>
|
||||||
#include <IO/WriteBufferFromFile.h>
|
#include <IO/WriteBufferFromFile.h>
|
||||||
@ -35,6 +32,12 @@
|
|||||||
|
|
||||||
#include <Poco/Util/XMLConfiguration.h>
|
#include <Poco/Util/XMLConfiguration.h>
|
||||||
|
|
||||||
|
#include <incbin.h>
|
||||||
|
|
||||||
|
/// Embedded configuration files used inside the install program
|
||||||
|
INCBIN(resource_config_xml, "config.xml");
|
||||||
|
INCBIN(resource_users_xml, "users.xml");
|
||||||
|
|
||||||
|
|
||||||
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
|
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
|
||||||
* It also allows to avoid dependency on systemd, upstart, SysV init.
|
* It also allows to avoid dependency on systemd, upstart, SysV init.
|
||||||
@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
|||||||
|
|
||||||
if (!fs::exists(main_config_file))
|
if (!fs::exists(main_config_file))
|
||||||
{
|
{
|
||||||
std::string_view main_config_content = getResource("config.xml");
|
std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
|
||||||
if (main_config_content.empty())
|
if (main_config_content.empty())
|
||||||
{
|
{
|
||||||
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
|
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
|
||||||
@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
|||||||
|
|
||||||
if (!fs::exists(users_config_file))
|
if (!fs::exists(users_config_file))
|
||||||
{
|
{
|
||||||
std::string_view users_config_content = getResource("users.xml");
|
std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
|
||||||
if (users_config_content.empty())
|
if (users_config_content.empty())
|
||||||
{
|
{
|
||||||
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
|
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
|
||||||
|
@ -1,16 +1,3 @@
|
|||||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
|
||||||
|
|
||||||
if (OS_LINUX)
|
|
||||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
|
||||||
# for some reason INTERFACE linkage doesn't work for standalone binary
|
|
||||||
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
clickhouse_embed_binaries(
|
|
||||||
TARGET clickhouse_keeper_configs
|
|
||||||
RESOURCES keeper_config.xml keeper_embedded.xml
|
|
||||||
)
|
|
||||||
|
|
||||||
set(CLICKHOUSE_KEEPER_SOURCES
|
set(CLICKHOUSE_KEEPER_SOURCES
|
||||||
Keeper.cpp
|
Keeper.cpp
|
||||||
)
|
)
|
||||||
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
|
|||||||
clickhouse_program_add(keeper)
|
clickhouse_program_add(keeper)
|
||||||
|
|
||||||
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
|
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
|
||||||
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
|
|
||||||
|
|
||||||
if (BUILD_STANDALONE_KEEPER)
|
if (BUILD_STANDALONE_KEEPER)
|
||||||
# Straight list of all required sources
|
# Straight list of all required sources
|
||||||
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
|
|||||||
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
|
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
|
||||||
)
|
)
|
||||||
|
|
||||||
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
|
|
||||||
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
||||||
|
|
||||||
if (SPLIT_DEBUG_SYMBOLS)
|
if (SPLIT_DEBUG_SYMBOLS)
|
||||||
|
@ -457,8 +457,10 @@ try
|
|||||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||||
|
|
||||||
std::vector<std::string> extra_paths = {include_from_path};
|
std::vector<std::string> extra_paths = {include_from_path};
|
||||||
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
|
if (!cert_path.empty())
|
||||||
if (!key_path.empty()) extra_paths.emplace_back(key_path);
|
extra_paths.emplace_back(cert_path);
|
||||||
|
if (!key_path.empty())
|
||||||
|
extra_paths.emplace_back(key_path);
|
||||||
|
|
||||||
/// ConfigReloader have to strict parameters which are redundant in our case
|
/// ConfigReloader have to strict parameters which are redundant in our case
|
||||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||||
|
@ -1,12 +1,8 @@
|
|||||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
|
||||||
|
|
||||||
set(CLICKHOUSE_SERVER_SOURCES
|
set(CLICKHOUSE_SERVER_SOURCES
|
||||||
MetricsTransmitter.cpp
|
MetricsTransmitter.cpp
|
||||||
Server.cpp
|
Server.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
|
||||||
|
|
||||||
set (CLICKHOUSE_SERVER_LINK
|
set (CLICKHOUSE_SERVER_LINK
|
||||||
PRIVATE
|
PRIVATE
|
||||||
clickhouse_aggregate_functions
|
clickhouse_aggregate_functions
|
||||||
@ -33,10 +29,4 @@ endif()
|
|||||||
|
|
||||||
clickhouse_program_add(server)
|
clickhouse_program_add(server)
|
||||||
|
|
||||||
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
|
target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
clickhouse_embed_binaries(
|
|
||||||
TARGET clickhouse_server_configs
|
|
||||||
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
|
|
||||||
)
|
|
||||||
add_dependencies(clickhouse-server-lib clickhouse_server_configs)
|
|
||||||
|
@ -128,6 +128,10 @@
|
|||||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <incbin.h>
|
||||||
|
/// A minimal file used when the server is run without installation
|
||||||
|
INCBIN(resource_embedded_xml, "embedded.xml");
|
||||||
|
|
||||||
namespace CurrentMetrics
|
namespace CurrentMetrics
|
||||||
{
|
{
|
||||||
extern const Metric Revision;
|
extern const Metric Revision;
|
||||||
@ -393,6 +397,7 @@ int Server::run()
|
|||||||
|
|
||||||
void Server::initialize(Poco::Util::Application & self)
|
void Server::initialize(Poco::Util::Application & self)
|
||||||
{
|
{
|
||||||
|
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
|
||||||
BaseDaemon::initialize(self);
|
BaseDaemon::initialize(self);
|
||||||
logger().information("starting up");
|
logger().information("starting up");
|
||||||
|
|
||||||
@ -1105,8 +1110,10 @@ try
|
|||||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||||
|
|
||||||
std::vector<std::string> extra_paths = {include_from_path};
|
std::vector<std::string> extra_paths = {include_from_path};
|
||||||
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
|
if (!cert_path.empty())
|
||||||
if (!key_path.empty()) extra_paths.emplace_back(key_path);
|
extra_paths.emplace_back(cert_path);
|
||||||
|
if (!key_path.empty())
|
||||||
|
extra_paths.emplace_back(key_path);
|
||||||
|
|
||||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||||
config_path,
|
config_path,
|
||||||
|
0
programs/server/resources.cpp
Normal file
0
programs/server/resources.cpp
Normal file
@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
|
|||||||
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
|
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
|
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
|
||||||
|
|
||||||
add_subdirectory(Access/Common)
|
add_subdirectory(Access/Common)
|
||||||
add_subdirectory(Common/ZooKeeper)
|
add_subdirectory(Common/ZooKeeper)
|
||||||
@ -296,7 +296,7 @@ macro (dbms_target_include_directories)
|
|||||||
endforeach ()
|
endforeach ()
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
||||||
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
|
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server")
|
||||||
target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
|
target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
|
||||||
|
|
||||||
if (TARGET ch_contrib::llvm)
|
if (TARGET ch_contrib::llvm)
|
||||||
@ -561,7 +561,7 @@ if (ENABLE_NLP)
|
|||||||
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
||||||
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
|
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
|
||||||
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
|
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
|
||||||
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
|
target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (TARGET ch_contrib::ulid)
|
if (TARGET ch_contrib::ulid)
|
||||||
|
@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ENABLE_MYSQL)
|
if (ENABLE_MYSQL)
|
||||||
add_subdirectory (mysqlxx)
|
add_subdirectory(mysqlxx)
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -19,7 +19,6 @@
|
|||||||
#include <Common/ZooKeeper/KeeperException.h>
|
#include <Common/ZooKeeper/KeeperException.h>
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Common/Exception.h>
|
#include <Common/Exception.h>
|
||||||
#include <Common/getResource.h>
|
|
||||||
#include <Common/XMLUtils.h>
|
#include <Common/XMLUtils.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
#include <base/errnoToString.h>
|
#include <base/errnoToString.h>
|
||||||
@ -83,6 +82,13 @@ ConfigProcessor::~ConfigProcessor()
|
|||||||
Poco::Logger::destroy("ConfigProcessor");
|
Poco::Logger::destroy("ConfigProcessor");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::unordered_map<std::string, std::string_view> embedded_configs;
|
||||||
|
|
||||||
|
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
|
||||||
|
{
|
||||||
|
embedded_configs[name] = content;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Vector containing the name of the element and a sorted list of attribute names and values
|
/// Vector containing the name of the element and a sorted list of attribute names and values
|
||||||
/// (except "remove" and "replace" attributes).
|
/// (except "remove" and "replace" attributes).
|
||||||
@ -281,15 +287,15 @@ void ConfigProcessor::doIncludesRecursive(
|
|||||||
{
|
{
|
||||||
std::string value = node->nodeValue();
|
std::string value = node->nodeValue();
|
||||||
|
|
||||||
bool replace_occured = false;
|
bool replace_occurred = false;
|
||||||
size_t pos;
|
size_t pos;
|
||||||
while ((pos = value.find(substitution.first)) != std::string::npos)
|
while ((pos = value.find(substitution.first)) != std::string::npos)
|
||||||
{
|
{
|
||||||
value.replace(pos, substitution.first.length(), substitution.second);
|
value.replace(pos, substitution.first.length(), substitution.second);
|
||||||
replace_occured = true;
|
replace_occurred = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (replace_occured)
|
if (replace_occurred)
|
||||||
node->setNodeValue(value);
|
node->setNodeValue(value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -528,26 +534,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/// These embedded files added during build with some cmake magic.
|
/// When we can use a config embedded in the binary.
|
||||||
/// Look at the end of programs/server/CMakeLists.txt.
|
if (auto it = embedded_configs.find(path); it != embedded_configs.end())
|
||||||
std::string embedded_name;
|
|
||||||
if (path == "config.xml")
|
|
||||||
embedded_name = "embedded.xml";
|
|
||||||
|
|
||||||
if (path == "keeper_config.xml")
|
|
||||||
embedded_name = "keeper_embedded.xml";
|
|
||||||
|
|
||||||
/// When we can use config embedded in binary.
|
|
||||||
if (!embedded_name.empty())
|
|
||||||
{
|
{
|
||||||
auto resource = getResource(embedded_name);
|
|
||||||
if (resource.empty())
|
|
||||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
|
|
||||||
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
|
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
|
||||||
config = dom_parser.parseMemory(resource.data(), resource.size());
|
config = dom_parser.parseMemory(it->second.data(), it->second.size());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path);
|
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> contributing_files;
|
std::vector<std::string> contributing_files;
|
||||||
|
@ -65,6 +65,9 @@ public:
|
|||||||
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
|
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
|
||||||
const zkutil::EventPtr & zk_changed_event = nullptr);
|
const zkutil::EventPtr & zk_changed_event = nullptr);
|
||||||
|
|
||||||
|
/// These configurations will be used if there is no configuration file.
|
||||||
|
static void registerEmbeddedConfig(std::string name, std::string_view content);
|
||||||
|
|
||||||
|
|
||||||
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
|
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
|
||||||
/// The resulting XML document is saved into a file with the name
|
/// The resulting XML document is saved into a file with the name
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
#include <cctz/civil_time.h>
|
#include <cctz/civil_time.h>
|
||||||
#include <cctz/time_zone.h>
|
#include <cctz/time_zone.h>
|
||||||
#include <cctz/zone_info_source.h>
|
#include <cctz/zone_info_source.h>
|
||||||
#include <Common/getResource.h>
|
|
||||||
#include <Poco/Exception.h>
|
#include <Poco/Exception.h>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@ -11,6 +10,11 @@
|
|||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
|
||||||
|
/// Embedded timezones.
|
||||||
|
std::string_view getTimeZone(const char * name);
|
||||||
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -249,9 +253,10 @@ namespace cctz_extension
|
|||||||
const std::string & name,
|
const std::string & name,
|
||||||
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
|
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
|
||||||
{
|
{
|
||||||
std::string_view resource = getResource(name);
|
std::string_view tz_file = getTimeZone(name.data());
|
||||||
if (!resource.empty())
|
|
||||||
return std::make_unique<Source>(resource.data(), resource.size());
|
if (!tz_file.empty())
|
||||||
|
return std::make_unique<Source>(tz_file.data(), tz_file.size());
|
||||||
|
|
||||||
return fallback(name);
|
return fallback(name);
|
||||||
}
|
}
|
||||||
|
185
src/Common/FrequencyHolder.cpp
Normal file
185
src/Common/FrequencyHolder.cpp
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
#include <Common/FrequencyHolder.h>
|
||||||
|
|
||||||
|
#if USE_NLP
|
||||||
|
|
||||||
|
#include <incbin.h>
|
||||||
|
|
||||||
|
/// Embedded SQL definitions
|
||||||
|
INCBIN(resource_charset_zst, "charset.zst");
|
||||||
|
INCBIN(resource_tonality_ru_zst, "tonality_ru.zst");
|
||||||
|
INCBIN(resource_programming_zst, "programming.zst");
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int FILE_DOESNT_EXIST;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
FrequencyHolder & FrequencyHolder::getInstance()
|
||||||
|
{
|
||||||
|
static FrequencyHolder instance;
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
FrequencyHolder::FrequencyHolder()
|
||||||
|
{
|
||||||
|
loadEmotionalDict();
|
||||||
|
loadEncodingsFrequency();
|
||||||
|
loadProgrammingFrequency();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FrequencyHolder::loadEncodingsFrequency()
|
||||||
|
{
|
||||||
|
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
|
||||||
|
|
||||||
|
LOG_TRACE(log, "Loading embedded charset frequencies");
|
||||||
|
|
||||||
|
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
|
||||||
|
if (resource.empty())
|
||||||
|
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
|
||||||
|
|
||||||
|
String line;
|
||||||
|
UInt16 bigram;
|
||||||
|
Float64 frequency;
|
||||||
|
String charset_name;
|
||||||
|
|
||||||
|
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||||
|
ZstdInflatingReadBuffer in(std::move(buf));
|
||||||
|
|
||||||
|
while (!in.eof())
|
||||||
|
{
|
||||||
|
readString(line, in);
|
||||||
|
in.ignore();
|
||||||
|
|
||||||
|
if (line.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ReadBufferFromString buf_line(line);
|
||||||
|
|
||||||
|
// Start loading a new charset
|
||||||
|
if (line.starts_with("// "))
|
||||||
|
{
|
||||||
|
// Skip "// "
|
||||||
|
buf_line.ignore(3);
|
||||||
|
readString(charset_name, buf_line);
|
||||||
|
|
||||||
|
/* In our dictionary we have lines with form: <Language>_<Charset>
|
||||||
|
* If we need to find language of data, we return <Language>
|
||||||
|
* If we need to find charset of data, we return <Charset>.
|
||||||
|
*/
|
||||||
|
size_t sep = charset_name.find('_');
|
||||||
|
|
||||||
|
Encoding enc;
|
||||||
|
enc.lang = charset_name.substr(0, sep);
|
||||||
|
enc.name = charset_name.substr(sep + 1);
|
||||||
|
encodings_freq.push_back(std::move(enc));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
readIntText(bigram, buf_line);
|
||||||
|
buf_line.ignore();
|
||||||
|
readFloatText(frequency, buf_line);
|
||||||
|
|
||||||
|
encodings_freq.back().map[bigram] = frequency;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void FrequencyHolder::loadEmotionalDict()
|
||||||
|
{
|
||||||
|
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
|
||||||
|
LOG_TRACE(log, "Loading embedded emotional dictionary");
|
||||||
|
|
||||||
|
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
|
||||||
|
if (resource.empty())
|
||||||
|
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
|
||||||
|
|
||||||
|
String line;
|
||||||
|
String word;
|
||||||
|
Float64 tonality;
|
||||||
|
size_t count = 0;
|
||||||
|
|
||||||
|
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||||
|
ZstdInflatingReadBuffer in(std::move(buf));
|
||||||
|
|
||||||
|
while (!in.eof())
|
||||||
|
{
|
||||||
|
readString(line, in);
|
||||||
|
in.ignore();
|
||||||
|
|
||||||
|
if (line.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ReadBufferFromString buf_line(line);
|
||||||
|
|
||||||
|
readStringUntilWhitespace(word, buf_line);
|
||||||
|
buf_line.ignore();
|
||||||
|
readFloatText(tonality, buf_line);
|
||||||
|
|
||||||
|
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
|
||||||
|
emotional_dict[ref] = tonality;
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
|
||||||
|
}
|
||||||
|
|
||||||
|
void FrequencyHolder::loadProgrammingFrequency()
|
||||||
|
{
|
||||||
|
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
|
||||||
|
|
||||||
|
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
|
||||||
|
|
||||||
|
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
|
||||||
|
if (resource.empty())
|
||||||
|
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
|
||||||
|
|
||||||
|
String line;
|
||||||
|
String bigram;
|
||||||
|
Float64 frequency;
|
||||||
|
String programming_language;
|
||||||
|
|
||||||
|
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||||
|
ZstdInflatingReadBuffer in(std::move(buf));
|
||||||
|
|
||||||
|
while (!in.eof())
|
||||||
|
{
|
||||||
|
readString(line, in);
|
||||||
|
in.ignore();
|
||||||
|
|
||||||
|
if (line.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ReadBufferFromString buf_line(line);
|
||||||
|
|
||||||
|
// Start loading a new language
|
||||||
|
if (line.starts_with("// "))
|
||||||
|
{
|
||||||
|
// Skip "// "
|
||||||
|
buf_line.ignore(3);
|
||||||
|
readString(programming_language, buf_line);
|
||||||
|
|
||||||
|
Language lang;
|
||||||
|
lang.name = programming_language;
|
||||||
|
programming_freq.push_back(std::move(lang));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
readStringUntilWhitespace(bigram, buf_line);
|
||||||
|
buf_line.ignore();
|
||||||
|
readFloatText(frequency, buf_line);
|
||||||
|
|
||||||
|
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
|
||||||
|
programming_freq.back().map[ref] = frequency;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG_TRACE(log, "Programming languages frequencies was added");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -1,5 +1,9 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#if USE_NLP
|
||||||
|
|
||||||
#include <base/StringRef.h>
|
#include <base/StringRef.h>
|
||||||
#include <Common/logger_useful.h>
|
#include <Common/logger_useful.h>
|
||||||
|
|
||||||
@ -7,7 +11,6 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <Common/Arena.h>
|
#include <Common/Arena.h>
|
||||||
#include <Common/getResource.h>
|
|
||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <IO/ReadBufferFromFile.h>
|
#include <IO/ReadBufferFromFile.h>
|
||||||
@ -20,11 +23,6 @@
|
|||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
namespace ErrorCodes
|
|
||||||
{
|
|
||||||
extern const int FILE_DOESNT_EXIST;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// FrequencyHolder class is responsible for storing and loading dictionaries
|
/// FrequencyHolder class is responsible for storing and loading dictionaries
|
||||||
/// needed for text classification functions:
|
/// needed for text classification functions:
|
||||||
///
|
///
|
||||||
@ -56,11 +54,7 @@ public:
|
|||||||
using EncodingMap = HashMap<UInt16, Float64>;
|
using EncodingMap = HashMap<UInt16, Float64>;
|
||||||
using EncodingContainer = std::vector<Encoding>;
|
using EncodingContainer = std::vector<Encoding>;
|
||||||
|
|
||||||
static FrequencyHolder & getInstance()
|
static FrequencyHolder & getInstance();
|
||||||
{
|
|
||||||
static FrequencyHolder instance;
|
|
||||||
return instance;
|
|
||||||
}
|
|
||||||
|
|
||||||
const Map & getEmotionalDict() const
|
const Map & getEmotionalDict() const
|
||||||
{
|
{
|
||||||
@ -78,161 +72,11 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
FrequencyHolder();
|
||||||
|
|
||||||
FrequencyHolder()
|
void loadEncodingsFrequency();
|
||||||
{
|
void loadEmotionalDict();
|
||||||
loadEmotionalDict();
|
void loadProgrammingFrequency();
|
||||||
loadEncodingsFrequency();
|
|
||||||
loadProgrammingFrequency();
|
|
||||||
}
|
|
||||||
|
|
||||||
void loadEncodingsFrequency()
|
|
||||||
{
|
|
||||||
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
|
|
||||||
|
|
||||||
LOG_TRACE(log, "Loading embedded charset frequencies");
|
|
||||||
|
|
||||||
auto resource = getResource("charset.zst");
|
|
||||||
if (resource.empty())
|
|
||||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
|
|
||||||
|
|
||||||
String line;
|
|
||||||
UInt16 bigram;
|
|
||||||
Float64 frequency;
|
|
||||||
String charset_name;
|
|
||||||
|
|
||||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
|
||||||
ZstdInflatingReadBuffer in(std::move(buf));
|
|
||||||
|
|
||||||
while (!in.eof())
|
|
||||||
{
|
|
||||||
readString(line, in);
|
|
||||||
in.ignore();
|
|
||||||
|
|
||||||
if (line.empty())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ReadBufferFromString buf_line(line);
|
|
||||||
|
|
||||||
// Start loading a new charset
|
|
||||||
if (line.starts_with("// "))
|
|
||||||
{
|
|
||||||
// Skip "// "
|
|
||||||
buf_line.ignore(3);
|
|
||||||
readString(charset_name, buf_line);
|
|
||||||
|
|
||||||
/* In our dictionary we have lines with form: <Language>_<Charset>
|
|
||||||
* If we need to find language of data, we return <Language>
|
|
||||||
* If we need to find charset of data, we return <Charset>.
|
|
||||||
*/
|
|
||||||
size_t sep = charset_name.find('_');
|
|
||||||
|
|
||||||
Encoding enc;
|
|
||||||
enc.lang = charset_name.substr(0, sep);
|
|
||||||
enc.name = charset_name.substr(sep + 1);
|
|
||||||
encodings_freq.push_back(std::move(enc));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
readIntText(bigram, buf_line);
|
|
||||||
buf_line.ignore();
|
|
||||||
readFloatText(frequency, buf_line);
|
|
||||||
|
|
||||||
encodings_freq.back().map[bigram] = frequency;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
void loadEmotionalDict()
|
|
||||||
{
|
|
||||||
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
|
|
||||||
LOG_TRACE(log, "Loading embedded emotional dictionary");
|
|
||||||
|
|
||||||
auto resource = getResource("tonality_ru.zst");
|
|
||||||
if (resource.empty())
|
|
||||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
|
|
||||||
|
|
||||||
String line;
|
|
||||||
String word;
|
|
||||||
Float64 tonality;
|
|
||||||
size_t count = 0;
|
|
||||||
|
|
||||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
|
||||||
ZstdInflatingReadBuffer in(std::move(buf));
|
|
||||||
|
|
||||||
while (!in.eof())
|
|
||||||
{
|
|
||||||
readString(line, in);
|
|
||||||
in.ignore();
|
|
||||||
|
|
||||||
if (line.empty())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ReadBufferFromString buf_line(line);
|
|
||||||
|
|
||||||
readStringUntilWhitespace(word, buf_line);
|
|
||||||
buf_line.ignore();
|
|
||||||
readFloatText(tonality, buf_line);
|
|
||||||
|
|
||||||
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
|
|
||||||
emotional_dict[ref] = tonality;
|
|
||||||
++count;
|
|
||||||
}
|
|
||||||
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
|
|
||||||
}
|
|
||||||
|
|
||||||
void loadProgrammingFrequency()
|
|
||||||
{
|
|
||||||
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
|
|
||||||
|
|
||||||
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
|
|
||||||
|
|
||||||
auto resource = getResource("programming.zst");
|
|
||||||
if (resource.empty())
|
|
||||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
|
|
||||||
|
|
||||||
String line;
|
|
||||||
String bigram;
|
|
||||||
Float64 frequency;
|
|
||||||
String programming_language;
|
|
||||||
|
|
||||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
|
||||||
ZstdInflatingReadBuffer in(std::move(buf));
|
|
||||||
|
|
||||||
while (!in.eof())
|
|
||||||
{
|
|
||||||
readString(line, in);
|
|
||||||
in.ignore();
|
|
||||||
|
|
||||||
if (line.empty())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ReadBufferFromString buf_line(line);
|
|
||||||
|
|
||||||
// Start loading a new language
|
|
||||||
if (line.starts_with("// "))
|
|
||||||
{
|
|
||||||
// Skip "// "
|
|
||||||
buf_line.ignore(3);
|
|
||||||
readString(programming_language, buf_line);
|
|
||||||
|
|
||||||
Language lang;
|
|
||||||
lang.name = programming_language;
|
|
||||||
programming_freq.push_back(std::move(lang));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
readStringUntilWhitespace(bigram, buf_line);
|
|
||||||
buf_line.ignore();
|
|
||||||
readFloatText(frequency, buf_line);
|
|
||||||
|
|
||||||
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
|
|
||||||
programming_freq.back().map[ref] = frequency;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG_TRACE(log, "Programming languages frequencies was added");
|
|
||||||
}
|
|
||||||
|
|
||||||
Arena string_pool;
|
Arena string_pool;
|
||||||
|
|
||||||
@ -241,3 +85,5 @@ private:
|
|||||||
EncodingContainer encodings_freq;
|
EncodingContainer encodings_freq;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -88,50 +88,13 @@ namespace
|
|||||||
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
|
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
|
||||||
|
|
||||||
|
|
||||||
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
|
|
||||||
{
|
|
||||||
const char * char_address = static_cast<const char *>(address);
|
|
||||||
|
|
||||||
if (name.starts_with("_binary_") || name.starts_with("binary_"))
|
|
||||||
{
|
|
||||||
if (name.ends_with("_start"))
|
|
||||||
{
|
|
||||||
name = name.substr((name[0] == '_') + strlen("binary_"));
|
|
||||||
name = name.substr(0, name.size() - strlen("_start"));
|
|
||||||
|
|
||||||
auto & resource = resources[name];
|
|
||||||
if (!resource.base_address || resource.base_address == base_address)
|
|
||||||
{
|
|
||||||
resource.base_address = base_address;
|
|
||||||
resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
|
|
||||||
resource.object_name = object_name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (name.ends_with("_end"))
|
|
||||||
{
|
|
||||||
name = name.substr((name[0] == '_') + strlen("binary_"));
|
|
||||||
name = name.substr(0, name.size() - strlen("_end"));
|
|
||||||
|
|
||||||
auto & resource = resources[name];
|
|
||||||
if (!resource.base_address || resource.base_address == base_address)
|
|
||||||
{
|
|
||||||
resource.base_address = base_address;
|
|
||||||
resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
|
|
||||||
resource.object_name = object_name;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Based on the code of musl-libc and the answer of Kanalpiroge on
|
/// Based on the code of musl-libc and the answer of Kanalpiroge on
|
||||||
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
|
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
|
||||||
/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
|
/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
|
||||||
/// but will work if we cannot find or parse ELF files.
|
/// but will work if we cannot find or parse ELF files.
|
||||||
void collectSymbolsFromProgramHeaders(
|
void collectSymbolsFromProgramHeaders(
|
||||||
dl_phdr_info * info,
|
dl_phdr_info * info,
|
||||||
std::vector<SymbolIndex::Symbol> & symbols,
|
std::vector<SymbolIndex::Symbol> & symbols)
|
||||||
SymbolIndex::Resources & resources)
|
|
||||||
{
|
{
|
||||||
/* Iterate over all headers of the current shared lib
|
/* Iterate over all headers of the current shared lib
|
||||||
* (first call is for the executable itself)
|
* (first call is for the executable itself)
|
||||||
@ -265,9 +228,6 @@ void collectSymbolsFromProgramHeaders(
|
|||||||
/// We are not interested in empty symbols.
|
/// We are not interested in empty symbols.
|
||||||
if (elf_sym[sym_index].st_size)
|
if (elf_sym[sym_index].st_size)
|
||||||
symbols.push_back(symbol);
|
symbols.push_back(symbol);
|
||||||
|
|
||||||
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
|
|
||||||
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
@ -303,8 +263,7 @@ void collectSymbolsFromELFSymbolTable(
|
|||||||
const Elf & elf,
|
const Elf & elf,
|
||||||
const Elf::Section & symbol_table,
|
const Elf::Section & symbol_table,
|
||||||
const Elf::Section & string_table,
|
const Elf::Section & string_table,
|
||||||
std::vector<SymbolIndex::Symbol> & symbols,
|
std::vector<SymbolIndex::Symbol> & symbols)
|
||||||
SymbolIndex::Resources & resources)
|
|
||||||
{
|
{
|
||||||
/// Iterate symbol table.
|
/// Iterate symbol table.
|
||||||
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
|
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
|
||||||
@ -334,8 +293,6 @@ void collectSymbolsFromELFSymbolTable(
|
|||||||
|
|
||||||
if (symbol_table_entry->st_size)
|
if (symbol_table_entry->st_size)
|
||||||
symbols.push_back(symbol);
|
symbols.push_back(symbol);
|
||||||
|
|
||||||
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -345,8 +302,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
|||||||
const Elf & elf,
|
const Elf & elf,
|
||||||
unsigned section_header_type,
|
unsigned section_header_type,
|
||||||
const char * string_table_name,
|
const char * string_table_name,
|
||||||
std::vector<SymbolIndex::Symbol> & symbols,
|
std::vector<SymbolIndex::Symbol> & symbols)
|
||||||
SymbolIndex::Resources & resources)
|
|
||||||
{
|
{
|
||||||
std::optional<Elf::Section> symbol_table;
|
std::optional<Elf::Section> symbol_table;
|
||||||
std::optional<Elf::Section> string_table;
|
std::optional<Elf::Section> string_table;
|
||||||
@ -364,7 +320,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources);
|
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -373,7 +329,6 @@ void collectSymbolsFromELF(
|
|||||||
dl_phdr_info * info,
|
dl_phdr_info * info,
|
||||||
std::vector<SymbolIndex::Symbol> & symbols,
|
std::vector<SymbolIndex::Symbol> & symbols,
|
||||||
std::vector<SymbolIndex::Object> & objects,
|
std::vector<SymbolIndex::Object> & objects,
|
||||||
SymbolIndex::Resources & resources,
|
|
||||||
String & build_id)
|
String & build_id)
|
||||||
{
|
{
|
||||||
String object_name;
|
String object_name;
|
||||||
@ -485,11 +440,11 @@ void collectSymbolsFromELF(
|
|||||||
object.name = object_name;
|
object.name = object_name;
|
||||||
objects.push_back(std::move(object));
|
objects.push_back(std::move(object));
|
||||||
|
|
||||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources);
|
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
|
||||||
|
|
||||||
/// Unneeded if they were parsed from "program headers" of loaded objects.
|
/// Unneeded if they were parsed from "program headers" of loaded objects.
|
||||||
#if defined USE_MUSL
|
#if defined USE_MUSL
|
||||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources);
|
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -502,8 +457,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
|
|||||||
{
|
{
|
||||||
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
|
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
|
||||||
|
|
||||||
collectSymbolsFromProgramHeaders(info, data.symbols, data.resources);
|
collectSymbolsFromProgramHeaders(info, data.symbols);
|
||||||
collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id);
|
collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
|
||||||
|
|
||||||
/* Continue iterations */
|
/* Continue iterations */
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <Common/Elf.h>
|
#include <Common/Elf.h>
|
||||||
#include <boost/noncopyable.hpp>
|
#include <boost/noncopyable.hpp>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -45,44 +46,15 @@ public:
|
|||||||
const std::vector<Symbol> & symbols() const { return data.symbols; }
|
const std::vector<Symbol> & symbols() const { return data.symbols; }
|
||||||
const std::vector<Object> & objects() const { return data.objects; }
|
const std::vector<Object> & objects() const { return data.objects; }
|
||||||
|
|
||||||
std::string_view getResource(String name) const
|
|
||||||
{
|
|
||||||
if (auto it = data.resources.find(name); it != data.resources.end())
|
|
||||||
return it->second.data();
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The BuildID that is generated by compiler.
|
/// The BuildID that is generated by compiler.
|
||||||
String getBuildID() const { return data.build_id; }
|
String getBuildID() const { return data.build_id; }
|
||||||
String getBuildIDHex() const;
|
String getBuildIDHex() const;
|
||||||
|
|
||||||
struct ResourcesBlob
|
|
||||||
{
|
|
||||||
/// Symbol can be presented in multiple shared objects,
|
|
||||||
/// base_address will be used to compare only symbols from the same SO.
|
|
||||||
ElfW(Addr) base_address = 0;
|
|
||||||
/// Just a human name of the SO.
|
|
||||||
std::string_view object_name;
|
|
||||||
/// Data blob.
|
|
||||||
std::string_view start;
|
|
||||||
std::string_view end;
|
|
||||||
|
|
||||||
std::string_view data() const
|
|
||||||
{
|
|
||||||
assert(end.data() >= start.data());
|
|
||||||
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
|
|
||||||
|
|
||||||
struct Data
|
struct Data
|
||||||
{
|
{
|
||||||
std::vector<Symbol> symbols;
|
std::vector<Symbol> symbols;
|
||||||
std::vector<Object> objects;
|
std::vector<Object> objects;
|
||||||
String build_id;
|
String build_id;
|
||||||
|
|
||||||
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
|
|
||||||
Resources resources;
|
|
||||||
};
|
};
|
||||||
private:
|
private:
|
||||||
Data data;
|
Data data;
|
||||||
|
@ -1,17 +0,0 @@
|
|||||||
#include "getResource.h"
|
|
||||||
#include <boost/algorithm/string/replace.hpp>
|
|
||||||
#include <Common/SymbolIndex.h>
|
|
||||||
|
|
||||||
|
|
||||||
std::string_view getResource(std::string_view name)
|
|
||||||
{
|
|
||||||
// Convert the resource file name into the form generated by `ld -r -b binary`.
|
|
||||||
std::string name_replaced(name);
|
|
||||||
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
|
|
||||||
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
|
|
||||||
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
|
|
||||||
boost::replace_all(name_replaced, "+", "_PLUS_");
|
|
||||||
|
|
||||||
/// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
|
|
||||||
return DB::SymbolIndex::instance().getResource(name_replaced);
|
|
||||||
}
|
|
@ -1,7 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <string_view>
|
|
||||||
|
|
||||||
/// Get resource from binary if exists. Otherwise return empty string view.
|
|
||||||
/// Resources are data that is embedded into executable at link time.
|
|
||||||
std::string_view getResource(std::string_view name);
|
|
@ -548,4 +548,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
|
|||||||
// {0, 0 + 11 * 3600 * 24 + 12, 11},
|
// {0, 0 + 11 * 3600 * 24 + 12, 11},
|
||||||
}))
|
}))
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -38,7 +38,6 @@
|
|||||||
#include <base/coverage.h>
|
#include <base/coverage.h>
|
||||||
#include <base/sleep.h>
|
#include <base/sleep.h>
|
||||||
|
|
||||||
#include <IO/WriteBufferFromFile.h>
|
|
||||||
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
||||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
|
@ -1,9 +1,12 @@
|
|||||||
#include <Common/FrequencyHolder.h>
|
#include <Common/FrequencyHolder.h>
|
||||||
|
|
||||||
|
#if USE_NLP
|
||||||
|
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/FunctionsTextClassification.h>
|
#include <Functions/FunctionsTextClassification.h>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <unordered_map>
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -46,7 +49,7 @@ namespace
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Сount how many times each bigram occurs in the text.
|
/// Count how many times each bigram occurs in the text.
|
||||||
template <typename ModelMap>
|
template <typename ModelMap>
|
||||||
ALWAYS_INLINE inline void calculateStats(
|
ALWAYS_INLINE inline void calculateStats(
|
||||||
const UInt8 * data,
|
const UInt8 * data,
|
||||||
@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -5,19 +5,17 @@
|
|||||||
#include <Columns/ColumnMap.h>
|
#include <Columns/ColumnMap.h>
|
||||||
#include <Columns/ColumnArray.h>
|
#include <Columns/ColumnArray.h>
|
||||||
#include <Columns/ColumnString.h>
|
#include <Columns/ColumnString.h>
|
||||||
#include <Columns/ColumnsNumber.h>
|
|
||||||
#include <Common/isValidUTF8.h>
|
#include <Common/isValidUTF8.h>
|
||||||
#include <DataTypes/DataTypeMap.h>
|
#include <DataTypes/DataTypeMap.h>
|
||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <DataTypes/DataTypeTuple.h>
|
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <Functions/FunctionHelpers.h>
|
#include <Functions/FunctionHelpers.h>
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/FunctionsTextClassification.h>
|
#include <Functions/FunctionsTextClassification.h>
|
||||||
#include <Interpreters/Context.h>
|
|
||||||
|
|
||||||
#include <compact_lang_det.h>
|
#include <compact_lang_det.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
/* Determine language of Unicode UTF-8 text.
|
/* Determine language of Unicode UTF-8 text.
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
#include <Common/FrequencyHolder.h>
|
#include <Common/FrequencyHolder.h>
|
||||||
|
|
||||||
|
#if USE_NLP
|
||||||
|
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/FunctionsTextClassification.h>
|
#include <Functions/FunctionsTextClassification.h>
|
||||||
@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
#include <Common/FrequencyHolder.h>
|
#include <Common/FrequencyHolder.h>
|
||||||
|
|
||||||
|
#if USE_NLP
|
||||||
|
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <Functions/FunctionsTextClassification.h>
|
#include <Functions/FunctionsTextClassification.h>
|
||||||
@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -6,10 +6,16 @@
|
|||||||
#include <Poco/Util/LayeredConfiguration.h>
|
#include <Poco/Util/LayeredConfiguration.h>
|
||||||
|
|
||||||
#include <IO/HTTPCommon.h>
|
#include <IO/HTTPCommon.h>
|
||||||
#include <Common/getResource.h>
|
|
||||||
|
|
||||||
#include <re2/re2.h>
|
#include <re2/re2.h>
|
||||||
|
|
||||||
|
#include <incbin.h>
|
||||||
|
|
||||||
|
/// Embedded HTML pages
|
||||||
|
INCBIN(resource_play_html, "play.html");
|
||||||
|
INCBIN(resource_dashboard_html, "dashboard.html");
|
||||||
|
INCBIN(resource_uplot_js, "js/uplot.js");
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
|
|||||||
if (request.getURI().starts_with("/play"))
|
if (request.getURI().starts_with("/play"))
|
||||||
{
|
{
|
||||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||||
*response.send() << getResource("play.html");
|
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
|
||||||
}
|
}
|
||||||
else if (request.getURI().starts_with("/dashboard"))
|
else if (request.getURI().starts_with("/dashboard"))
|
||||||
{
|
{
|
||||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||||
|
|
||||||
std::string html(getResource("dashboard.html"));
|
std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
|
||||||
|
|
||||||
/// Replace a link to external JavaScript file to embedded file.
|
/// Replace a link to external JavaScript file to embedded file.
|
||||||
/// This allows to open the HTML without running a server and to host it on server.
|
/// This allows to open the HTML without running a server and to host it on server.
|
||||||
@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
|
|||||||
else if (request.getURI() == "/js/uplot.js")
|
else if (request.getURI() == "/js/uplot.js")
|
||||||
{
|
{
|
||||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||||
*response.send() << getResource("js/uplot.js");
|
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -30,7 +30,6 @@ endif()
|
|||||||
add_dependencies(generate-source generate-contributors)
|
add_dependencies(generate-source generate-contributors)
|
||||||
|
|
||||||
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
|
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
|
||||||
set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
|
|
||||||
|
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
OUTPUT StorageSystemLicenses.generated.cpp
|
OUTPUT StorageSystemLicenses.generated.cpp
|
||||||
@ -38,23 +37,13 @@ add_custom_command(
|
|||||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
|
|
||||||
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
|
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
|
||||||
list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
|
|
||||||
|
|
||||||
# Overlength strings
|
# Overlength strings
|
||||||
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
|
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
|
||||||
|
|
||||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
|
||||||
clickhouse_embed_binaries(
|
|
||||||
TARGET information_schema_metadata
|
|
||||||
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
|
|
||||||
RESOURCES schemata.sql tables.sql views.sql columns.sql
|
|
||||||
)
|
|
||||||
|
|
||||||
list (SORT storages_system_sources) # Reproducible build
|
list (SORT storages_system_sources) # Reproducible build
|
||||||
add_library(clickhouse_storages_system ${storages_system_sources})
|
add_library(clickhouse_storages_system ${storages_system_sources})
|
||||||
|
|
||||||
add_dependencies(clickhouse_storages_system information_schema_metadata)
|
|
||||||
|
|
||||||
target_link_libraries(clickhouse_storages_system PRIVATE
|
target_link_libraries(clickhouse_storages_system PRIVATE
|
||||||
dbms
|
dbms
|
||||||
common
|
common
|
||||||
@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
|
|||||||
clickhouse_common_zookeeper
|
clickhouse_common_zookeeper
|
||||||
clickhouse_parsers
|
clickhouse_parsers
|
||||||
Poco::JSON
|
Poco::JSON
|
||||||
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)
|
||||||
|
@ -3,14 +3,21 @@
|
|||||||
#include <Storages/System/attachSystemTablesImpl.h>
|
#include <Storages/System/attachSystemTablesImpl.h>
|
||||||
#include <Parsers/ParserCreateQuery.h>
|
#include <Parsers/ParserCreateQuery.h>
|
||||||
#include <Parsers/parseQuery.h>
|
#include <Parsers/parseQuery.h>
|
||||||
#include <Common/getResource.h>
|
#include <incbin.h>
|
||||||
|
|
||||||
|
/// Embedded SQL definitions
|
||||||
|
INCBIN(resource_schemata_sql, "schemata.sql");
|
||||||
|
INCBIN(resource_tables_sql, "tables.sql");
|
||||||
|
INCBIN(resource_views_sql, "views.sql");
|
||||||
|
INCBIN(resource_columns_sql, "columns.sql");
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
|
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
|
||||||
|
|
||||||
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name)
|
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
|||||||
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
|
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
|
||||||
|
|
||||||
String metadata_resource_name = view_name + ".sql";
|
String metadata_resource_name = view_name + ".sql";
|
||||||
auto attach_query = getResource(metadata_resource_name);
|
if (query.empty())
|
||||||
if (attach_query.empty())
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
ParserCreateQuery parser;
|
ParserCreateQuery parser;
|
||||||
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(),
|
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
|
||||||
"Attach query from embedded resource " + metadata_resource_name,
|
"Attach query from embedded resource " + metadata_resource_name,
|
||||||
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||||
|
|
||||||
@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
|||||||
|
|
||||||
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
|
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
|
||||||
{
|
{
|
||||||
createInformationSchemaView(context, information_schema_database, "schemata");
|
createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
|
||||||
createInformationSchemaView(context, information_schema_database, "tables");
|
createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
|
||||||
createInformationSchemaView(context, information_schema_database, "views");
|
createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
|
||||||
createInformationSchemaView(context, information_schema_database, "columns");
|
createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -238,10 +238,6 @@ defaultValueOfArgumentType
|
|||||||
defaultValueOfTypeName
|
defaultValueOfTypeName
|
||||||
degrees
|
degrees
|
||||||
demangle
|
demangle
|
||||||
detectCharset
|
|
||||||
detectLanguageUnknown
|
|
||||||
detectProgrammingLanguage
|
|
||||||
detectTonality
|
|
||||||
divide
|
divide
|
||||||
dotProduct
|
dotProduct
|
||||||
dumpColumnStructure
|
dumpColumnStructure
|
||||||
|
@ -15,5 +15,7 @@ AND name NOT IN (
|
|||||||
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
|
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
|
||||||
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
|
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
|
||||||
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
|
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
|
||||||
'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test
|
'lemmatize', 'tokenize', 'stem', 'synonyms',
|
||||||
|
'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
|
||||||
|
-- these functions are not enabled in fast test
|
||||||
) ORDER BY name;
|
) ORDER BY name;
|
||||||
|
Loading…
Reference in New Issue
Block a user