From 00d6f2ee08a3e442363a078b322adab7b6988f91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 04:56:47 +0200 Subject: [PATCH] Use incbin for resources, part 1 --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 +- contrib/incbin | 1 + contrib/incbin-cmake/CMakeLists.txt | 4 + contrib/nlp-data-cmake/CMakeLists.txt | 15 -- programs/install/CMakeLists.txt | 3 + programs/install/Install.cpp | 13 +- programs/keeper/CMakeLists.txt | 15 -- programs/keeper/Keeper.cpp | 6 +- programs/server/CMakeLists.txt | 12 +- programs/server/Server.cpp | 11 +- programs/server/resources.cpp | 0 src/CMakeLists.txt | 6 +- src/Common/CMakeLists.txt | 2 +- src/Common/Config/ConfigProcessor.cpp | 33 ++-- src/Common/Config/ConfigProcessor.h | 3 + src/Common/FrequencyHolder.cpp | 181 ++++++++++++++++++ src/Common/FrequencyHolder.h | 170 +--------------- src/Daemon/BaseDaemon.cpp | 1 - src/Server/WebUIRequestHandler.cpp | 14 +- src/Storages/System/CMakeLists.txt | 12 +- .../System/attachInformationSchemaTables.cpp | 24 ++- 22 files changed, 268 insertions(+), 263 deletions(-) create mode 160000 contrib/incbin create mode 100644 contrib/incbin-cmake/CMakeLists.txt delete mode 100644 contrib/nlp-data-cmake/CMakeLists.txt create mode 100644 programs/server/resources.cpp create mode 100644 src/Common/FrequencyHolder.cpp diff --git a/.gitmodules b/.gitmodules index ba71a8ae3a7..30085fb8dd4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -340,3 +340,6 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/c-ares/c-ares.git +[submodule "contrib/incbin"] + path = contrib/incbin + url = https://github.com/graphitemaster/incbin.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0f68c0cbc7c..fdf6e60e58f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) +add_contrib (incbin-cmake incbin) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (lemmagen-c-cmake lemmagen-c) - add_contrib (nlp-data-cmake nlp-data) add_contrib (cld2-cmake cld2) endif() diff --git a/contrib/incbin b/contrib/incbin new file mode 160000 index 00000000000..6e576cae5ab --- /dev/null +++ b/contrib/incbin @@ -0,0 +1 @@ +Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e64ebc99c73 --- /dev/null +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") +add_library(_incbin INTERFACE) +target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) +add_library(ch_contrib::incbin ALIAS _incbin) diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt deleted file mode 100644 index 5380269c479..00000000000 --- a/contrib/nlp-data-cmake/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") - -add_library (_nlp_data INTERFACE) - -clickhouse_embed_binaries( - TARGET nlp_dictionaries - RESOURCE_DIR "${LIBRARY_DIR}" - RESOURCES charset.zst tonality_ru.zst programming.zst -) - -add_dependencies(_nlp_data nlp_dictionaries) -target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt index c3f4d96d631..f3f562bab7c 100644 --- a/programs/install/CMakeLists.txt +++ b/programs/install/CMakeLists.txt @@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK ) clickhouse_program_add_library(install) + +# For incbin +target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server") diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d83e189f7ef..da2c95af62c 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,10 +20,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -35,6 +32,12 @@ #include +#include + +/// Embedded configuration files used inside the install program +INCBIN(resource_config_xml, "config.xml"); +INCBIN(resource_users_xml, "users.xml"); + /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. * It also allows to avoid dependency on systemd, upstart, SysV init. @@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(main_config_file)) { - std::string_view main_config_content = getResource("config.xml"); + std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize); if (main_config_content.empty()) { fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); @@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(users_config_file)) { - std::string_view users_config_content = getResource("users.xml"); + std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize); if (users_config_content.empty()) { fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 940e6848597..317e35959aa 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,16 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -if (OS_LINUX) - set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - # for some reason INTERFACE linkage doesn't work for standalone binary - set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -endif () - -clickhouse_embed_binaries( - TARGET clickhouse_keeper_configs - RESOURCES keeper_config.xml keeper_embedded.xml -) - set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp ) @@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) -add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources @@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER) ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) - add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) if (SPLIT_DEBUG_SYMBOLS) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6034d63a016..a38467c3369 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -457,8 +457,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); /// ConfigReloader have to strict parameters which are redundant in our case auto main_config_reloader = std::make_unique( diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 855973d10e1..e008e65acf6 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -1,12 +1,8 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set(CLICKHOUSE_SERVER_SOURCES MetricsTransmitter.cpp Server.cpp ) -set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - set (CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_aggregate_functions @@ -33,10 +29,4 @@ endif() clickhouse_program_add(server) -install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) - -clickhouse_embed_binaries( - TARGET clickhouse_server_configs - RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js -) -add_dependencies(clickhouse-server-lib clickhouse_server_configs) +target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 33fdcc9c1a8..229a169dc1e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -128,6 +128,10 @@ # include #endif +#include +/// A minimal file used when the server is run without installation +INCBIN(resource_embedded_xml, "embedded.xml"); + namespace CurrentMetrics { extern const Metric Revision; @@ -393,6 +397,7 @@ int Server::run() void Server::initialize(Poco::Util::Application & self) { + ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast(gresource_embedded_xmlData), gresource_embedded_xmlSize)); BaseDaemon::initialize(self); logger().information("starting up"); @@ -1105,8 +1110,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); auto main_config_reloader = std::make_unique( config_path, diff --git a/programs/server/resources.cpp b/programs/server/resources.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f870993f080..fda8bafde59 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) endif() -target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin) add_subdirectory(Access/Common) add_subdirectory(Common/ZooKeeper) @@ -296,7 +296,7 @@ macro (dbms_target_include_directories) endforeach () endmacro () -dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") +dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") if (TARGET ch_contrib::llvm) @@ -561,7 +561,7 @@ if (ENABLE_NLP) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) - dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data) + target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data) endif() if (TARGET ch_contrib::ulid) diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index e527b3dec43..b83c8431f0a 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES) endif() if (ENABLE_MYSQL) - add_subdirectory (mysqlxx) + add_subdirectory(mysqlxx) endif () diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 5bbc8eae0de..c3a8f69cf3f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -83,6 +83,13 @@ ConfigProcessor::~ConfigProcessor() Poco::Logger::destroy("ConfigProcessor"); } +static std::unordered_map embedded_configs; + +void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content) +{ + embedded_configs[name] = content; +} + /// Vector containing the name of the element and a sorted list of attribute names and values /// (except "remove" and "replace" attributes). @@ -281,15 +288,15 @@ void ConfigProcessor::doIncludesRecursive( { std::string value = node->nodeValue(); - bool replace_occured = false; + bool replace_occurred = false; size_t pos; while ((pos = value.find(substitution.first)) != std::string::npos) { value.replace(pos, substitution.first.length(), substitution.second); - replace_occured = true; + replace_occurred = true; } - if (replace_occured) + if (replace_occurred) node->setNodeValue(value); } } @@ -528,26 +535,14 @@ XMLDocumentPtr ConfigProcessor::processConfig( } else { - /// These embedded files added during build with some cmake magic. - /// Look at the end of programs/server/CMakeLists.txt. - std::string embedded_name; - if (path == "config.xml") - embedded_name = "embedded.xml"; - - if (path == "keeper_config.xml") - embedded_name = "keeper_embedded.xml"; - - /// When we can use config embedded in binary. - if (!embedded_name.empty()) + /// When we can use a config embedded in the binary. + if (auto it = embedded_configs.find(path); it != embedded_configs.end()) { - auto resource = getResource(embedded_name); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path); - config = dom_parser.parseMemory(resource.data(), resource.size()); + config = dom_parser.parseMemory(it->second.data(), it->second.size()); } else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); } std::vector contributing_files; diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 0ca3e46db88..eefe65ef06c 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -65,6 +65,9 @@ public: zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr, const zkutil::EventPtr & zk_changed_event = nullptr); + /// These configurations will be used if there is no configuration file. + static void registerEmbeddedConfig(std::string name, std::string_view content); + /// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration. /// The resulting XML document is saved into a file with the name diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp new file mode 100644 index 00000000000..3b755cacacb --- /dev/null +++ b/src/Common/FrequencyHolder.cpp @@ -0,0 +1,181 @@ +#include + +#include + +/// Embedded SQL definitions +INCBIN(resource_charset_zst, "charset.zst"); +INCBIN(resource_tonality_ru_zst, "tonality_ru.zst"); +INCBIN(resource_programming_zst, "programming.zst"); + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; +} + + +FrequencyHolder & FrequencyHolder::getInstance() +{ + static FrequencyHolder instance; + return instance; +} + +FrequencyHolder::FrequencyHolder() +{ + loadEmotionalDict(); + loadEncodingsFrequency(); + loadProgrammingFrequency(); +} + +void FrequencyHolder::loadEncodingsFrequency() +{ + Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); + + LOG_TRACE(log, "Loading embedded charset frequencies"); + + std::string_view resource(reinterpret_cast(gresource_charset_zstData), gresource_charset_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); + + String line; + UInt16 bigram; + Float64 frequency; + String charset_name; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new charset + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(charset_name, buf_line); + + /* In our dictionary we have lines with form: _ + * If we need to find language of data, we return + * If we need to find charset of data, we return . + */ + size_t sep = charset_name.find('_'); + + Encoding enc; + enc.lang = charset_name.substr(0, sep); + enc.name = charset_name.substr(sep + 1); + encodings_freq.push_back(std::move(enc)); + } + else + { + readIntText(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + encodings_freq.back().map[bigram] = frequency; + } + } + LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); +} + +void FrequencyHolder::loadEmotionalDict() +{ + Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); + LOG_TRACE(log, "Loading embedded emotional dictionary"); + + std::string_view resource(reinterpret_cast(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); + + String line; + String word; + Float64 tonality; + size_t count = 0; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + readStringUntilWhitespace(word, buf_line); + buf_line.ignore(); + readFloatText(tonality, buf_line); + + StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; + emotional_dict[ref] = tonality; + ++count; + } + LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); +} + +void FrequencyHolder::loadProgrammingFrequency() +{ + Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); + + LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); + + std::string_view resource(reinterpret_cast(gresource_programming_zstData), gresource_programming_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); + + String line; + String bigram; + Float64 frequency; + String programming_language; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new language + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(programming_language, buf_line); + + Language lang; + lang.name = programming_language; + programming_freq.push_back(std::move(lang)); + } + else + { + readStringUntilWhitespace(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; + programming_freq.back().map[ref] = frequency; + } + } + LOG_TRACE(log, "Programming languages frequencies was added"); +} + +} diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 74098598441..270e4dbbd2a 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -20,11 +19,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; -} - /// FrequencyHolder class is responsible for storing and loading dictionaries /// needed for text classification functions: /// @@ -56,11 +50,7 @@ public: using EncodingMap = HashMap; using EncodingContainer = std::vector; - static FrequencyHolder & getInstance() - { - static FrequencyHolder instance; - return instance; - } + static FrequencyHolder & getInstance(); const Map & getEmotionalDict() const { @@ -78,161 +68,11 @@ public: } private: + FrequencyHolder(); - FrequencyHolder() - { - loadEmotionalDict(); - loadEncodingsFrequency(); - loadProgrammingFrequency(); - } - - void loadEncodingsFrequency() - { - Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); - - LOG_TRACE(log, "Loading embedded charset frequencies"); - - auto resource = getResource("charset.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); - - String line; - UInt16 bigram; - Float64 frequency; - String charset_name; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - // Start loading a new charset - if (line.starts_with("// ")) - { - // Skip "// " - buf_line.ignore(3); - readString(charset_name, buf_line); - - /* In our dictionary we have lines with form: _ - * If we need to find language of data, we return - * If we need to find charset of data, we return . - */ - size_t sep = charset_name.find('_'); - - Encoding enc; - enc.lang = charset_name.substr(0, sep); - enc.name = charset_name.substr(sep + 1); - encodings_freq.push_back(std::move(enc)); - } - else - { - readIntText(bigram, buf_line); - buf_line.ignore(); - readFloatText(frequency, buf_line); - - encodings_freq.back().map[bigram] = frequency; - } - } - LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); - } - - void loadEmotionalDict() - { - Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); - LOG_TRACE(log, "Loading embedded emotional dictionary"); - - auto resource = getResource("tonality_ru.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); - - String line; - String word; - Float64 tonality; - size_t count = 0; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - readStringUntilWhitespace(word, buf_line); - buf_line.ignore(); - readFloatText(tonality, buf_line); - - StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; - emotional_dict[ref] = tonality; - ++count; - } - LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); - } - - void loadProgrammingFrequency() - { - Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); - - LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); - - auto resource = getResource("programming.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); - - String line; - String bigram; - Float64 frequency; - String programming_language; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - // Start loading a new language - if (line.starts_with("// ")) - { - // Skip "// " - buf_line.ignore(3); - readString(programming_language, buf_line); - - Language lang; - lang.name = programming_language; - programming_freq.push_back(std::move(lang)); - } - else - { - readStringUntilWhitespace(bigram, buf_line); - buf_line.ignore(); - readFloatText(frequency, buf_line); - - StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; - programming_freq.back().map[ref] = frequency; - } - } - LOG_TRACE(log, "Programming languages frequencies was added"); - } + void loadEncodingsFrequency(); + void loadEmotionalDict(); + void loadProgrammingFrequency(); Arena string_pool; diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 3852ec5ada5..f61ca054b2a 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -38,7 +38,6 @@ #include #include -#include #include #include #include diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 3997e0f19b6..cb9e8935d8c 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -6,10 +6,16 @@ #include #include -#include #include +#include + +/// Embedded HTML pages +INCBIN(resource_play_html, "play.html"); +INCBIN(resource_dashboard_html, "dashboard.html"); +INCBIN(resource_uplot_js, "js/uplot.js"); + namespace DB { @@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR if (request.getURI().starts_with("/play")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("play.html"); + *response.send() << std::string_view(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); } else if (request.getURI().starts_with("/dashboard")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - std::string html(getResource("dashboard.html")); + std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); /// Replace a link to external JavaScript file to embedded file. /// This allows to open the HTML without running a server and to host it on server. @@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("js/uplot.js"); + *response.send() << std::string_view(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } else { diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 1d2a3de5101..6b7d1739e33 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -43,18 +43,9 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) -clickhouse_embed_binaries( - TARGET information_schema_metadata - RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" - RESOURCES schemata.sql tables.sql views.sql columns.sql -) - list (SORT storages_system_sources) # Reproducible build add_library(clickhouse_storages_system ${storages_system_sources}) -add_dependencies(clickhouse_storages_system information_schema_metadata) - target_link_libraries(clickhouse_storages_system PRIVATE dbms common @@ -62,5 +53,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE clickhouse_common_zookeeper clickhouse_parsers Poco::JSON - INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" ) + +target_include_directories(clickhouse_storages_system PRIVATE InformationSchema) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 61a91685324..bfc5c8c64e2 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -3,14 +3,21 @@ #include #include #include -#include +#include + +/// Embedded SQL definitions +INCBIN(resource_schemata_sql, "schemata.sql"); +INCBIN(resource_tables_sql, "tables.sql"); +INCBIN(resource_views_sql, "views.sql"); +INCBIN(resource_columns_sql, "columns.sql"); + namespace DB { /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt -static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) +static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query) { try { @@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; - auto attach_query = getResource(metadata_resource_name); - if (attach_query.empty()) + if (query.empty()) return; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) { - createInformationSchemaView(context, information_schema_database, "schemata"); - createInformationSchemaView(context, information_schema_database, "tables"); - createInformationSchemaView(context, information_schema_database, "views"); - createInformationSchemaView(context, information_schema_database, "columns"); + createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast(gresource_schemata_sqlData), gresource_schemata_sqlSize)); + createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast(gresource_tables_sqlData), gresource_tables_sqlSize)); + createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast(gresource_views_sqlData), gresource_views_sqlSize)); + createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast(gresource_columns_sqlData), gresource_columns_sqlSize)); } }