Use incbin for resources, part 1

This commit is contained in:
Alexey Milovidov 2023-07-23 04:56:47 +02:00
parent 2467eaf32d
commit 00d6f2ee08
22 changed files with 268 additions and 263 deletions

3
.gitmodules vendored
View File

@ -340,3 +340,6 @@
[submodule "contrib/c-ares"] [submodule "contrib/c-ares"]
path = contrib/c-ares path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git url = https://github.com/c-ares/c-ares.git
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git

View File

@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft) add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float) add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP) if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c) add_contrib (lemmagen-c-cmake lemmagen-c)
add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2) add_contrib (cld2-cmake cld2)
endif() endif()

1
contrib/incbin vendored Submodule

@ -0,0 +1 @@
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf

View File

@ -0,0 +1,4 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
add_library(_incbin INTERFACE)
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
add_library(ch_contrib::incbin ALIAS _incbin)

View File

@ -1,15 +0,0 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
add_library (_nlp_data INTERFACE)
clickhouse_embed_binaries(
TARGET nlp_dictionaries
RESOURCE_DIR "${LIBRARY_DIR}"
RESOURCES charset.zst tonality_ru.zst programming.zst
)
add_dependencies(_nlp_data nlp_dictionaries)
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
add_library(ch_contrib::nlp_data ALIAS _nlp_data)

View File

@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK
) )
clickhouse_program_add_library(install) clickhouse_program_add_library(install)
# For incbin
target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server")

View File

@ -20,10 +20,7 @@
#include <Common/formatReadable.h> #include <Common/formatReadable.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
#include <Common/OpenSSLHelpers.h> #include <Common/OpenSSLHelpers.h>
#include <base/hex.h>
#include <Common/getResource.h>
#include <base/sleep.h> #include <base/sleep.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h> #include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
@ -35,6 +32,12 @@
#include <Poco/Util/XMLConfiguration.h> #include <Poco/Util/XMLConfiguration.h>
#include <incbin.h>
/// Embedded configuration files used inside the install program
INCBIN(resource_config_xml, "config.xml");
INCBIN(resource_users_xml, "users.xml");
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
* It also allows to avoid dependency on systemd, upstart, SysV init. * It also allows to avoid dependency on systemd, upstart, SysV init.
@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(main_config_file)) if (!fs::exists(main_config_file))
{ {
std::string_view main_config_content = getResource("config.xml"); std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
if (main_config_content.empty()) if (main_config_content.empty())
{ {
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(users_config_file)) if (!fs::exists(users_config_file))
{ {
std::string_view users_config_content = getResource("users.xml"); std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
if (users_config_content.empty()) if (users_config_content.empty())
{ {
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());

View File

@ -1,16 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
if (OS_LINUX)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
# for some reason INTERFACE linkage doesn't work for standalone binary
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
clickhouse_embed_binaries(
TARGET clickhouse_keeper_configs
RESOURCES keeper_config.xml keeper_embedded.xml
)
set(CLICKHOUSE_KEEPER_SOURCES set(CLICKHOUSE_KEEPER_SOURCES
Keeper.cpp Keeper.cpp
) )
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_program_add(keeper) clickhouse_program_add(keeper)
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER) if (BUILD_STANDALONE_KEEPER)
# Straight list of all required sources # Straight list of all required sources
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
) )
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (SPLIT_DEBUG_SYMBOLS) if (SPLIT_DEBUG_SYMBOLS)

View File

@ -457,8 +457,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path}; std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path); if (!cert_path.empty())
if (!key_path.empty()) extra_paths.emplace_back(key_path); extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
/// ConfigReloader have to strict parameters which are redundant in our case /// ConfigReloader have to strict parameters which are redundant in our case
auto main_config_reloader = std::make_unique<ConfigReloader>( auto main_config_reloader = std::make_unique<ConfigReloader>(

View File

@ -1,12 +1,8 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(CLICKHOUSE_SERVER_SOURCES set(CLICKHOUSE_SERVER_SOURCES
MetricsTransmitter.cpp MetricsTransmitter.cpp
Server.cpp Server.cpp
) )
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
set (CLICKHOUSE_SERVER_LINK set (CLICKHOUSE_SERVER_LINK
PRIVATE PRIVATE
clickhouse_aggregate_functions clickhouse_aggregate_functions
@ -33,10 +29,4 @@ endif()
clickhouse_program_add(server) clickhouse_program_add(server)
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
clickhouse_embed_binaries(
TARGET clickhouse_server_configs
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
)
add_dependencies(clickhouse-server-lib clickhouse_server_configs)

View File

@ -128,6 +128,10 @@
# include <azure/storage/common/internal/xml_wrapper.hpp> # include <azure/storage/common/internal/xml_wrapper.hpp>
#endif #endif
#include <incbin.h>
/// A minimal file used when the server is run without installation
INCBIN(resource_embedded_xml, "embedded.xml");
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric Revision; extern const Metric Revision;
@ -393,6 +397,7 @@ int Server::run()
void Server::initialize(Poco::Util::Application & self) void Server::initialize(Poco::Util::Application & self)
{ {
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
BaseDaemon::initialize(self); BaseDaemon::initialize(self);
logger().information("starting up"); logger().information("starting up");
@ -1105,8 +1110,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path}; std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path); if (!cert_path.empty())
if (!key_path.empty()) extra_paths.emplace_back(key_path); extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
auto main_config_reloader = std::make_unique<ConfigReloader>( auto main_config_reloader = std::make_unique<ConfigReloader>(
config_path, config_path,

View File

View File

@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif() endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
add_subdirectory(Access/Common) add_subdirectory(Access/Common)
add_subdirectory(Common/ZooKeeper) add_subdirectory(Common/ZooKeeper)
@ -296,7 +296,7 @@ macro (dbms_target_include_directories)
endforeach () endforeach ()
endmacro () endmacro ()
dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server")
target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src")
if (TARGET ch_contrib::llvm) if (TARGET ch_contrib::llvm)
@ -561,7 +561,7 @@ if (ENABLE_NLP)
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data) target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data)
endif() endif()
if (TARGET ch_contrib::ulid) if (TARGET ch_contrib::ulid)

View File

@ -83,6 +83,13 @@ ConfigProcessor::~ConfigProcessor()
Poco::Logger::destroy("ConfigProcessor"); Poco::Logger::destroy("ConfigProcessor");
} }
static std::unordered_map<std::string, std::string_view> embedded_configs;
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
{
embedded_configs[name] = content;
}
/// Vector containing the name of the element and a sorted list of attribute names and values /// Vector containing the name of the element and a sorted list of attribute names and values
/// (except "remove" and "replace" attributes). /// (except "remove" and "replace" attributes).
@ -281,15 +288,15 @@ void ConfigProcessor::doIncludesRecursive(
{ {
std::string value = node->nodeValue(); std::string value = node->nodeValue();
bool replace_occured = false; bool replace_occurred = false;
size_t pos; size_t pos;
while ((pos = value.find(substitution.first)) != std::string::npos) while ((pos = value.find(substitution.first)) != std::string::npos)
{ {
value.replace(pos, substitution.first.length(), substitution.second); value.replace(pos, substitution.first.length(), substitution.second);
replace_occured = true; replace_occurred = true;
} }
if (replace_occured) if (replace_occurred)
node->setNodeValue(value); node->setNodeValue(value);
} }
} }
@ -528,26 +535,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
} }
else else
{ {
/// These embedded files added during build with some cmake magic. /// When we can use a config embedded in the binary.
/// Look at the end of programs/server/CMakeLists.txt. if (auto it = embedded_configs.find(path); it != embedded_configs.end())
std::string embedded_name;
if (path == "config.xml")
embedded_name = "embedded.xml";
if (path == "keeper_config.xml")
embedded_name = "keeper_embedded.xml";
/// When we can use config embedded in binary.
if (!embedded_name.empty())
{ {
auto resource = getResource(embedded_name);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path); LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
config = dom_parser.parseMemory(resource.data(), resource.size()); config = dom_parser.parseMemory(it->second.data(), it->second.size());
} }
else else
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path); throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
} }
std::vector<std::string> contributing_files; std::vector<std::string> contributing_files;

View File

@ -65,6 +65,9 @@ public:
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr, zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
const zkutil::EventPtr & zk_changed_event = nullptr); const zkutil::EventPtr & zk_changed_event = nullptr);
/// These configurations will be used if there is no configuration file.
static void registerEmbeddedConfig(std::string name, std::string_view content);
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration. /// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
/// The resulting XML document is saved into a file with the name /// The resulting XML document is saved into a file with the name

View File

@ -0,0 +1,181 @@
#include <Common/FrequencyHolder.h>
#include <incbin.h>
/// Embedded SQL definitions
INCBIN(resource_charset_zst, "charset.zst");
INCBIN(resource_tonality_ru_zst, "tonality_ru.zst");
INCBIN(resource_programming_zst, "programming.zst");
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
FrequencyHolder & FrequencyHolder::getInstance()
{
static FrequencyHolder instance;
return instance;
}
FrequencyHolder::FrequencyHolder()
{
loadEmotionalDict();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void FrequencyHolder::loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void FrequencyHolder::loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void FrequencyHolder::loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
}

View File

@ -7,7 +7,6 @@
#include <unordered_map> #include <unordered_map>
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/getResource.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
@ -20,11 +19,6 @@
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
/// FrequencyHolder class is responsible for storing and loading dictionaries /// FrequencyHolder class is responsible for storing and loading dictionaries
/// needed for text classification functions: /// needed for text classification functions:
/// ///
@ -56,11 +50,7 @@ public:
using EncodingMap = HashMap<UInt16, Float64>; using EncodingMap = HashMap<UInt16, Float64>;
using EncodingContainer = std::vector<Encoding>; using EncodingContainer = std::vector<Encoding>;
static FrequencyHolder & getInstance() static FrequencyHolder & getInstance();
{
static FrequencyHolder instance;
return instance;
}
const Map & getEmotionalDict() const const Map & getEmotionalDict() const
{ {
@ -78,161 +68,11 @@ public:
} }
private: private:
FrequencyHolder();
FrequencyHolder() void loadEncodingsFrequency();
{ void loadEmotionalDict();
loadEmotionalDict(); void loadProgrammingFrequency();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
auto resource = getResource("charset.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
auto resource = getResource("tonality_ru.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
auto resource = getResource("programming.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
Arena string_pool; Arena string_pool;

View File

@ -38,7 +38,6 @@
#include <base/coverage.h> #include <base/coverage.h>
#include <base/sleep.h> #include <base/sleep.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h> #include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
#include <IO/ReadBufferFromFileDescriptor.h> #include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>

View File

@ -6,10 +6,16 @@
#include <Poco/Util/LayeredConfiguration.h> #include <Poco/Util/LayeredConfiguration.h>
#include <IO/HTTPCommon.h> #include <IO/HTTPCommon.h>
#include <Common/getResource.h>
#include <re2/re2.h> #include <re2/re2.h>
#include <incbin.h>
/// Embedded HTML pages
INCBIN(resource_play_html, "play.html");
INCBIN(resource_dashboard_html, "dashboard.html");
INCBIN(resource_uplot_js, "js/uplot.js");
namespace DB namespace DB
{ {
@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
if (request.getURI().starts_with("/play")) if (request.getURI().starts_with("/play"))
{ {
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("play.html"); *response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
} }
else if (request.getURI().starts_with("/dashboard")) else if (request.getURI().starts_with("/dashboard"))
{ {
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
std::string html(getResource("dashboard.html")); std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
/// Replace a link to external JavaScript file to embedded file. /// Replace a link to external JavaScript file to embedded file.
/// This allows to open the HTML without running a server and to host it on server. /// This allows to open the HTML without running a server and to host it on server.
@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
else if (request.getURI() == "/js/uplot.js") else if (request.getURI() == "/js/uplot.js")
{ {
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("js/uplot.js"); *response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
} }
else else
{ {

View File

@ -43,18 +43,9 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
# Overlength strings # Overlength strings
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
clickhouse_embed_binaries(
TARGET information_schema_metadata
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
RESOURCES schemata.sql tables.sql views.sql columns.sql
)
list (SORT storages_system_sources) # Reproducible build list (SORT storages_system_sources) # Reproducible build
add_library(clickhouse_storages_system ${storages_system_sources}) add_library(clickhouse_storages_system ${storages_system_sources})
add_dependencies(clickhouse_storages_system information_schema_metadata)
target_link_libraries(clickhouse_storages_system PRIVATE target_link_libraries(clickhouse_storages_system PRIVATE
dbms dbms
common common
@ -62,5 +53,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
clickhouse_common_zookeeper clickhouse_common_zookeeper
clickhouse_parsers clickhouse_parsers
Poco::JSON Poco::JSON
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
) )
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)

View File

@ -3,14 +3,21 @@
#include <Storages/System/attachSystemTablesImpl.h> #include <Storages/System/attachSystemTablesImpl.h>
#include <Parsers/ParserCreateQuery.h> #include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h> #include <Parsers/parseQuery.h>
#include <Common/getResource.h> #include <incbin.h>
/// Embedded SQL definitions
INCBIN(resource_schemata_sql, "schemata.sql");
INCBIN(resource_tables_sql, "tables.sql");
INCBIN(resource_views_sql, "views.sql");
INCBIN(resource_columns_sql, "columns.sql");
namespace DB namespace DB
{ {
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
{ {
try try
{ {
@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
String metadata_resource_name = view_name + ".sql"; String metadata_resource_name = view_name + ".sql";
auto attach_query = getResource(metadata_resource_name); if (query.empty())
if (attach_query.empty())
return; return;
ParserCreateQuery parser; ParserCreateQuery parser;
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
"Attach query from embedded resource " + metadata_resource_name, "Attach query from embedded resource " + metadata_resource_name,
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
{ {
createInformationSchemaView(context, information_schema_database, "schemata"); createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
createInformationSchemaView(context, information_schema_database, "tables"); createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
createInformationSchemaView(context, information_schema_database, "views"); createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
createInformationSchemaView(context, information_schema_database, "columns"); createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
} }
} }