Merge branch 'master' into remove-wrong-code

This commit is contained in:
Alexey Milovidov 2023-07-24 19:34:01 +02:00
commit b9375782bd
65 changed files with 618 additions and 1064 deletions

3
.gitmodules vendored
View File

@ -340,3 +340,6 @@
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git

View File

@ -1,58 +0,0 @@
# Embed a set of resource files into a resulting object file.
#
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
#
# This will generate a static library target named `<target>`, which contains the contents of
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
#
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
# These are:
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
#
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
macro(clickhouse_embed_binaries)
set(one_value_args TARGET RESOURCE_DIR)
set(resources RESOURCES)
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
if (NOT DEFINED EMBED_TARGET)
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
endif()
if (NOT DEFINED EMBED_RESOURCE_DIR)
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
list(LENGTH EMBED_RESOURCES N_RESOURCES)
if (N_RESOURCES LESS 1)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
endforeach()
endmacro()

View File

@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c)
add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2)
endif()

View File

@ -1,4 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
set (SRCS
@ -23,12 +22,10 @@ if (OS_FREEBSD)
endif ()
# Related to time_zones table:
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
# as the library that's built using embedded tzdata is also specific to OS_LINUX
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# TimeZones.generated.cpp is autogenerated each time during a build
set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(REMOVE ${TIMEZONES_FILE})
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
set(TIMEZONE_RESOURCE_FILES)
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
clickhouse_embed_binaries(
TARGET tzdata
RESOURCE_DIR "${TZDIR}"
RESOURCES ${TIMEZONE_RESOURCE_FILES}
)
add_dependencies(_cctz tzdata)
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${TIMEZONES_FILE} " return {};\n")
file(APPEND ${TIMEZONES_FILE} "}\n")
add_library (tzdata ${TIMEZONES_FILE})
target_link_libraries(tzdata ch_contrib::incbin)
target_link_libraries(_cctz tzdata)
add_library(ch_contrib::cctz ALIAS _cctz)

1
contrib/incbin vendored Submodule

@ -0,0 +1 @@
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf

View File

@ -0,0 +1,8 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
add_library(_incbin INTERFACE)
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
add_library(ch_contrib::incbin ALIAS _incbin)
# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)

View File

@ -1,15 +0,0 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
add_library (_nlp_data INTERFACE)
clickhouse_embed_binaries(
TARGET nlp_dictionaries
RESOURCE_DIR "${LIBRARY_DIR}"
RESOURCES charset.zst tonality_ru.zst programming.zst
)
add_dependencies(_nlp_data nlp_dictionaries)
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
add_library(ch_contrib::nlp_data ALIAS _nlp_data)

View File

@ -147,6 +147,7 @@ function clone_submodules
contrib/simdjson
contrib/liburing
contrib/libfiu
contrib/incbin
)
git submodule sync

View File

@ -945,44 +945,6 @@ Result:
└────────────┴───────┘
```
## toDecimalString
Converts a numeric value to String with the number of fractional digits in the output specified by the user.
**Syntax**
``` sql
toDecimalString(number, scale)
```
**Parameters**
- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
* Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
* Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60.
**Returned value**
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
**Example**
Query:
``` sql
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
```
Result:
```response
┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐
│ 64.32000 │
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64)
## reinterpretAsInt(8\|16\|32\|64)

View File

@ -762,44 +762,6 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
└────────────┴───────┘
```
## toDecimalString
Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом.
**Синтаксис**
``` sql
toDecimalString(number, scale)
```
**Параметры**
- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md),
- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md).
* Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов),
* Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60.
**Возвращаемое значение**
- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части.
При необходимости число округляется по стандартным правилам арифметики.
**Пример использования**
Запрос:
``` sql
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
```
Результат:
```response
┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐
│ 64.32000 │
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}

View File

@ -20,10 +20,7 @@
#include <Common/formatReadable.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/OpenSSLHelpers.h>
#include <base/hex.h>
#include <Common/getResource.h>
#include <base/sleep.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
@ -35,6 +32,14 @@
#include <Poco/Util/XMLConfiguration.h>
#include <incbin.h>
#include "config.h"
/// Embedded configuration files used inside the install program
INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml");
INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
* It also allows to avoid dependency on systemd, upstart, SysV init.
@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(main_config_file))
{
std::string_view main_config_content = getResource("config.xml");
std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
if (main_config_content.empty())
{
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(users_config_file))
{
std::string_view users_config_content = getResource("users.xml");
std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
if (users_config_content.empty())
{
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());

View File

@ -1,16 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
if (OS_LINUX)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
# for some reason INTERFACE linkage doesn't work for standalone binary
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
clickhouse_embed_binaries(
TARGET clickhouse_keeper_configs
RESOURCES keeper_config.xml keeper_embedded.xml
)
set(CLICKHOUSE_KEEPER_SOURCES
Keeper.cpp
)
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_program_add(keeper)
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER)
# Straight list of all required sources
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (SPLIT_DEBUG_SYMBOLS)

View File

@ -457,8 +457,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
if (!key_path.empty()) extra_paths.emplace_back(key_path);
if (!cert_path.empty())
extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
/// ConfigReloader have to strict parameters which are redundant in our case
auto main_config_reloader = std::make_unique<ConfigReloader>(

View File

@ -1,12 +1,8 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(CLICKHOUSE_SERVER_SOURCES
MetricsTransmitter.cpp
Server.cpp
)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
set (CLICKHOUSE_SERVER_LINK
PRIVATE
clickhouse_aggregate_functions
@ -34,9 +30,3 @@ endif()
clickhouse_program_add(server)
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
clickhouse_embed_binaries(
TARGET clickhouse_server_configs
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
)
add_dependencies(clickhouse-server-lib clickhouse_server_configs)

View File

@ -128,6 +128,10 @@
# include <azure/storage/common/internal/xml_wrapper.hpp>
#endif
#include <incbin.h>
/// A minimal file used when the server is run without installation
INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml");
namespace CurrentMetrics
{
extern const Metric Revision;
@ -393,6 +397,7 @@ int Server::run()
void Server::initialize(Poco::Util::Application & self)
{
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
BaseDaemon::initialize(self);
logger().information("starting up");
@ -1106,8 +1111,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
if (!key_path.empty()) extra_paths.emplace_back(key_path);
if (!cert_path.empty())
extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
auto main_config_reloader = std::make_unique<ConfigReloader>(
config_path,

View File

View File

@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
add_subdirectory(Access/Common)
add_subdirectory(Common/ZooKeeper)
@ -561,7 +561,6 @@ if (ENABLE_NLP)
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
endif()
if (TARGET ch_contrib::ulid)

View File

@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
endif()
if (ENABLE_MYSQL)
add_subdirectory (mysqlxx)
add_subdirectory(mysqlxx)
endif ()

View File

@ -19,7 +19,6 @@
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <Common/getResource.h>
#include <Common/XMLUtils.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
@ -83,6 +82,13 @@ ConfigProcessor::~ConfigProcessor()
Poco::Logger::destroy("ConfigProcessor");
}
static std::unordered_map<std::string, std::string_view> embedded_configs;
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
{
embedded_configs[name] = content;
}
/// Vector containing the name of the element and a sorted list of attribute names and values
/// (except "remove" and "replace" attributes).
@ -281,15 +287,15 @@ void ConfigProcessor::doIncludesRecursive(
{
std::string value = node->nodeValue();
bool replace_occured = false;
bool replace_occurred = false;
size_t pos;
while ((pos = value.find(substitution.first)) != std::string::npos)
{
value.replace(pos, substitution.first.length(), substitution.second);
replace_occured = true;
replace_occurred = true;
}
if (replace_occured)
if (replace_occurred)
node->setNodeValue(value);
}
}
@ -528,26 +534,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
}
else
{
/// These embedded files added during build with some cmake magic.
/// Look at the end of programs/server/CMakeLists.txt.
std::string embedded_name;
if (path == "config.xml")
embedded_name = "embedded.xml";
if (path == "keeper_config.xml")
embedded_name = "keeper_embedded.xml";
/// When we can use config embedded in binary.
if (!embedded_name.empty())
/// When we can use a config embedded in the binary.
if (auto it = embedded_configs.find(path); it != embedded_configs.end())
{
auto resource = getResource(embedded_name);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
config = dom_parser.parseMemory(resource.data(), resource.size());
config = dom_parser.parseMemory(it->second.data(), it->second.size());
}
else
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path);
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
}
std::vector<std::string> contributing_files;

View File

@ -65,6 +65,9 @@ public:
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
const zkutil::EventPtr & zk_changed_event = nullptr);
/// These configurations will be used if there is no configuration file.
static void registerEmbeddedConfig(std::string name, std::string_view content);
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
/// The resulting XML document is saved into a file with the name

View File

@ -3,7 +3,6 @@
#include <cctz/civil_time.h>
#include <cctz/time_zone.h>
#include <cctz/zone_info_source.h>
#include <Common/getResource.h>
#include <Poco/Exception.h>
#include <algorithm>
@ -11,6 +10,11 @@
#include <chrono>
#include <cstring>
#include <memory>
#include <iostream>
/// Embedded timezones.
std::string_view getTimeZone(const char * name);
namespace
@ -249,9 +253,10 @@ namespace cctz_extension
const std::string & name,
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
{
std::string_view resource = getResource(name);
if (!resource.empty())
return std::make_unique<Source>(resource.data(), resource.size());
std::string_view tz_file = getTimeZone(name.data());
if (!tz_file.empty())
return std::make_unique<Source>(tz_file.data(), tz_file.size());
return fallback(name);
}

View File

@ -0,0 +1,185 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <incbin.h>
/// Embedded SQL definitions
INCBIN(resource_charset_zst, SOURCE_DIR "/contrib/nlp-data/charset.zst");
INCBIN(resource_tonality_ru_zst, SOURCE_DIR "/contrib/nlp-data/tonality_ru.zst");
INCBIN(resource_programming_zst, SOURCE_DIR "/contrib/nlp-data/programming.zst");
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
FrequencyHolder & FrequencyHolder::getInstance()
{
static FrequencyHolder instance;
return instance;
}
FrequencyHolder::FrequencyHolder()
{
loadEmotionalDict();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void FrequencyHolder::loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void FrequencyHolder::loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void FrequencyHolder::loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
}
#endif

View File

@ -1,5 +1,9 @@
#pragma once
#include "config.h"
#if USE_NLP
#include <base/StringRef.h>
#include <Common/logger_useful.h>
@ -7,7 +11,6 @@
#include <unordered_map>
#include <Common/Arena.h>
#include <Common/getResource.h>
#include <Common/HashTable/HashMap.h>
#include <Common/StringUtils/StringUtils.h>
#include <IO/ReadBufferFromFile.h>
@ -20,11 +23,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
/// FrequencyHolder class is responsible for storing and loading dictionaries
/// needed for text classification functions:
///
@ -56,11 +54,7 @@ public:
using EncodingMap = HashMap<UInt16, Float64>;
using EncodingContainer = std::vector<Encoding>;
static FrequencyHolder & getInstance()
{
static FrequencyHolder instance;
return instance;
}
static FrequencyHolder & getInstance();
const Map & getEmotionalDict() const
{
@ -78,161 +72,11 @@ public:
}
private:
FrequencyHolder();
FrequencyHolder()
{
loadEmotionalDict();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
auto resource = getResource("charset.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
auto resource = getResource("tonality_ru.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
auto resource = getResource("programming.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
void loadEncodingsFrequency();
void loadEmotionalDict();
void loadProgrammingFrequency();
Arena string_pool;
@ -241,3 +85,5 @@ private:
EncodingContainer encodings_freq;
};
}
#endif

View File

@ -45,6 +45,7 @@
M(MMappedFileCacheMisses, "Number of times a file has not been found in the MMap cache (for the 'mmap' read_method), so we had to mmap it again.") \
M(OpenedFileCacheHits, "Number of times a file has been found in the opened file cache, so we didn't have to open it again.") \
M(OpenedFileCacheMisses, "Number of times a file has been found in the opened file cache, so we had to open it again.") \
M(OpenedFileCacheMicroseconds, "Amount of time spent executing OpenedFileCache methods.") \
M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \
M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \
M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \

View File

@ -87,50 +87,13 @@ namespace
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
{
const char * char_address = static_cast<const char *>(address);
if (name.starts_with("_binary_") || name.starts_with("binary_"))
{
if (name.ends_with("_start"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_start"));
auto & resource = resources[name];
if (!resource.base_address || resource.base_address == base_address)
{
resource.base_address = base_address;
resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
resource.object_name = object_name;
}
}
if (name.ends_with("_end"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_end"));
auto & resource = resources[name];
if (!resource.base_address || resource.base_address == base_address)
{
resource.base_address = base_address;
resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
resource.object_name = object_name;
}
}
}
}
/// Based on the code of musl-libc and the answer of Kanalpiroge on
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
/// but will work if we cannot find or parse ELF files.
void collectSymbolsFromProgramHeaders(
dl_phdr_info * info,
std::vector<SymbolIndex::Symbol> & symbols,
SymbolIndex::Resources & resources)
std::vector<SymbolIndex::Symbol> & symbols)
{
/* Iterate over all headers of the current shared lib
* (first call is for the executable itself)
@ -248,9 +211,6 @@ void collectSymbolsFromProgramHeaders(
/// We are not interested in empty symbols.
if (elf_sym[sym_index].st_size)
symbols.push_back(symbol);
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
}
break;
@ -281,8 +241,7 @@ void collectSymbolsFromELFSymbolTable(
const Elf & elf,
const Elf::Section & symbol_table,
const Elf::Section & string_table,
std::vector<SymbolIndex::Symbol> & symbols,
SymbolIndex::Resources & resources)
std::vector<SymbolIndex::Symbol> & symbols)
{
/// Iterate symbol table.
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
@ -312,8 +271,6 @@ void collectSymbolsFromELFSymbolTable(
if (symbol_table_entry->st_size)
symbols.push_back(symbol);
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
}
}
@ -323,8 +280,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
const Elf & elf,
unsigned section_header_type,
const char * string_table_name,
std::vector<SymbolIndex::Symbol> & symbols,
SymbolIndex::Resources & resources)
std::vector<SymbolIndex::Symbol> & symbols)
{
std::optional<Elf::Section> symbol_table;
std::optional<Elf::Section> string_table;
@ -342,7 +298,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
return false;
}
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources);
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
return true;
}
@ -351,7 +307,6 @@ void collectSymbolsFromELF(
dl_phdr_info * info,
std::vector<SymbolIndex::Symbol> & symbols,
std::vector<SymbolIndex::Object> & objects,
SymbolIndex::Resources & resources,
String & build_id)
{
String object_name;
@ -462,11 +417,11 @@ void collectSymbolsFromELF(
object.name = object_name;
objects.push_back(std::move(object));
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources);
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
/// Unneeded if they were parsed from "program headers" of loaded objects.
#if defined USE_MUSL
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources);
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
#endif
}
@ -479,8 +434,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
{
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
collectSymbolsFromProgramHeaders(info, data.symbols, data.resources);
collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id);
collectSymbolsFromProgramHeaders(info, data.symbols);
collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
/* Continue iterations */
return 0;

View File

@ -8,6 +8,7 @@
#include <Common/Elf.h>
#include <boost/noncopyable.hpp>
namespace DB
{
@ -45,44 +46,15 @@ public:
const std::vector<Symbol> & symbols() const { return data.symbols; }
const std::vector<Object> & objects() const { return data.objects; }
std::string_view getResource(String name) const
{
if (auto it = data.resources.find(name); it != data.resources.end())
return it->second.data();
return {};
}
/// The BuildID that is generated by compiler.
String getBuildID() const { return data.build_id; }
String getBuildIDHex() const;
struct ResourcesBlob
{
/// Symbol can be presented in multiple shared objects,
/// base_address will be used to compare only symbols from the same SO.
ElfW(Addr) base_address = 0;
/// Just a human name of the SO.
std::string_view object_name;
/// Data blob.
std::string_view start;
std::string_view end;
std::string_view data() const
{
assert(end.data() >= start.data());
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
}
};
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
struct Data
{
std::vector<Symbol> symbols;
std::vector<Object> objects;
String build_id;
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
Resources resources;
};
private:
Data data;

View File

@ -59,3 +59,7 @@
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE
#cmakedefine01 USE_BCRYPT
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
/// That's why we use absolute paths.
#cmakedefine SOURCE_DIR "@SOURCE_DIR@"

View File

@ -1,52 +0,0 @@
#include "getResource.h"
#include <dlfcn.h>
#include <string>
#include <boost/algorithm/string/replace.hpp>
#include <Common/SymbolIndex.h>
std::string_view getResource(std::string_view name)
{
// Convert the resource file name into the form generated by `ld -r -b binary`.
std::string name_replaced(name);
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
boost::replace_all(name_replaced, "+", "_PLUS_");
#if defined USE_MUSL
/// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
return DB::SymbolIndex::instance().getResource(name_replaced);
#else
// In most `dlsym(3)` APIs, one passes the symbol name as it appears via
// something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
// looked up with the string `"_foo"`.
//
// Apple's linker is confusingly different. The NOTES on the man page for
// `dlsym(3)` claim that one looks up the symbol with "the name used in C
// source code". In this example, that would mean using the string `"foo"`.
// This apparently applies even in the case where the symbol did not originate
// from C source, such as the embedded binary resource files used here. So
// the symbol name must not have a leading `_` on Apple platforms. It's not
// clear how this applies to other symbols, such as those which _have_ a leading
// underscore in them by design, many leading underscores, etc.
#if defined OS_DARWIN
std::string prefix = "binary_";
#else
std::string prefix = "_binary_";
#endif
std::string symbol_name_start = prefix + name_replaced + "_start";
std::string symbol_name_end = prefix + name_replaced + "_end";
const char * sym_start = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
const char * sym_end = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));
if (sym_start && sym_end)
{
auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
return { sym_start, resource_size };
}
return {};
#endif
}

View File

@ -1,7 +0,0 @@
#pragma once
#include <string_view>
/// Get resource from binary if exists. Otherwise return empty string view.
/// Resources are data that is embedded into executable at link time.
std::string_view getResource(std::string_view name);

View File

@ -548,4 +548,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
// {0, 0 + 11 * 3600 * 24 + 12, 11},
}))
);

View File

@ -38,7 +38,6 @@
#include <base/coverage.h>
#include <base/sleep.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>

View File

@ -1,22 +0,0 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionToDecimalString.h>
#include <Functions/IFunction.h>
namespace DB
{
REGISTER_FUNCTION(ToDecimalString)
{
factory.registerFunction<FunctionToDecimalString>(
FunctionDocumentation{
.description=R"(
Returns string representation of a number. First argument is the number of any numeric type,
second argument is the desired number of digits in fractional part. Returns String.
)",
.examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
.categories{"String"}
}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -1,312 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <Core/DecimalFunctions.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER;
}
class FunctionToDecimalString : public IFunction
{
public:
static constexpr auto name = "toDecimalString";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToDecimalString>(); }
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isNumber(*arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal first argument for formatDecimal function: got {}, expected numeric type",
arguments[0]->getName());
if (!isUInt8(*arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal second argument for formatDecimal function: got {}, expected UInt8",
arguments[1]->getName());
return std::make_shared<DataTypeString>();
}
bool useDefaultImplementationForConstants() const override { return true; }
private:
/// For operations with Integer/Float
template <typename FromVectorType>
void vectorConstant(const FromVectorType & vec_from, UInt8 precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
/// Buffer is used here and in functions below because resulting size cannot be precisely anticipated,
/// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case.
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
for (size_t i = 0; i < input_rows_count; ++i)
{
format(vec_from[i], buf_to, precision);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgVectorType>
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
format(vec_from[i], buf_to, vec_precision[i]);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgType>
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_precision.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
format(value_from, buf_to, vec_precision[i]);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
/// For operations with Decimal
template <typename FirstArgVectorType>
void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
/// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77.
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
if (precision > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
writeText(vec_from[i], from_scale, buf_to, true, true, precision);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgVectorType>
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgType>
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
size_t input_rows_count = vec_precision.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <is_floating_point T>
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
{
/// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values,
/// Catch this here to give user a more reasonable error.
if (precision > 60)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too high precision requested for Float, must not be more than 60, got {}", Int8(precision));
DB::DoubleConverter<false>::BufferType buffer;
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
const auto result = DB::DoubleConverter<false>::instance().ToFixed(value, precision, &builder);
if (!result)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value);
out.write(buffer, builder.position());
writeChar(0, out);
}
template <is_integer T>
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
{
/// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals).
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
if (precision > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
writeText(value, out);
if (precision > 0) [[likely]]
{
writeChar('.', out);
for (int i = 0; i < precision; ++i)
writeChar('0', out);
writeChar(0, out);
}
}
public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
switch (arguments[0].type->getTypeId())
{
case TypeIndex::UInt8: return executeType<UInt8>(arguments);
case TypeIndex::UInt16: return executeType<UInt16>(arguments);
case TypeIndex::UInt32: return executeType<UInt32>(arguments);
case TypeIndex::UInt64: return executeType<UInt64>(arguments);
case TypeIndex::UInt128: return executeType<UInt128>(arguments);
case TypeIndex::UInt256: return executeType<UInt256>(arguments);
case TypeIndex::Int8: return executeType<Int8>(arguments);
case TypeIndex::Int16: return executeType<Int16>(arguments);
case TypeIndex::Int32: return executeType<Int32>(arguments);
case TypeIndex::Int64: return executeType<Int64>(arguments);
case TypeIndex::Int128: return executeType<Int128>(arguments);
case TypeIndex::Int256: return executeType<Int256>(arguments);
case TypeIndex::Float32: return executeType<Float32>(arguments);
case TypeIndex::Float64: return executeType<Float64>(arguments);
case TypeIndex::Decimal32: return executeType<Decimal32>(arguments);
case TypeIndex::Decimal64: return executeType<Decimal64>(arguments);
case TypeIndex::Decimal128: return executeType<Decimal128>(arguments);
case TypeIndex::Decimal256: return executeType<Decimal256>(arguments);
default:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
}
}
private:
template <typename T>
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
{
const auto * from_col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const auto * precision_col = checkAndGetColumn<ColumnVector<UInt8>>(arguments[1].column.get());
const auto * precision_col_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
auto result_col = ColumnString::create();
auto * result_col_string = assert_cast<ColumnString *>(result_col.get());
ColumnString::Chars & result_chars = result_col_string->getChars();
ColumnString::Offsets & result_offsets = result_col_string->getOffsets();
if constexpr (is_decimal<T>)
{
const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
UInt8 from_scale = from_col->getScale();
if (from_col)
{
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
else
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale);
}
else if (from_col_const)
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets, from_scale);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
}
else
{
const auto * from_col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
if (from_col)
{
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets);
else
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets);
}
else if (from_col_const)
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
}
return result_col;
}
};
}

View File

@ -1,9 +1,12 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
#include <memory>
#include <unordered_map>
namespace DB
{
@ -46,7 +49,7 @@ namespace
return res;
}
/// Сount how many times each bigram occurs in the text.
/// Count how many times each bigram occurs in the text.
template <typename ModelMap>
ALWAYS_INLINE inline void calculateStats(
const UInt8 * data,
@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset)
}
}
#endif

View File

@ -5,19 +5,17 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Common/isValidUTF8.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
#include <Interpreters/Context.h>
#include <compact_lang_det.h>
namespace DB
{
/* Determine language of Unicode UTF-8 text.

View File

@ -1,4 +1,7 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
}
}
#endif

View File

@ -1,4 +1,7 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
}
}
#endif

View File

@ -4,14 +4,18 @@
#include <mutex>
#include <Core/Types.h>
#include <Common/ProfileEvents.h>
#include <IO/OpenedFile.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>
#include <Common/ProfileEvents.h>
#include <city.h>
namespace ProfileEvents
{
extern const Event OpenedFileCacheHits;
extern const Event OpenedFileCacheMisses;
extern const Event OpenedFileCacheMicroseconds;
}
namespace DB
@ -26,57 +30,79 @@ namespace DB
*/
class OpenedFileCache
{
private:
using Key = std::pair<std::string /* path */, int /* flags */>;
class OpenedFileMap
{
using Key = std::pair<std::string /* path */, int /* flags */>;
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
using Files = std::map<Key, OpenedFileWeakPtr>;
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
using Files = std::map<Key, OpenedFileWeakPtr>;
Files files;
std::mutex mutex;
Files files;
std::mutex mutex;
public:
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
OpenedFilePtr get(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
if (!inserted)
{
if (auto res = it->second.lock())
{
ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
return res;
}
}
ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
OpenedFilePtr res
{
new OpenedFile(path, flags),
[key, this](auto ptr)
{
{
std::lock_guard another_lock(mutex);
files.erase(key);
}
delete ptr;
}
};
it->second = res;
return res;
}
void remove(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
files.erase(key);
}
};
static constexpr size_t buckets = 1024;
std::vector<OpenedFileMap> impls{buckets};
public:
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
using OpenedFilePtr = OpenedFileMap::OpenedFilePtr;
OpenedFilePtr get(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
if (!inserted)
{
if (auto res = it->second.lock())
{
ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
return res;
}
}
ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
OpenedFilePtr res
{
new OpenedFile(path, flags),
[key, this](auto ptr)
{
{
std::lock_guard another_lock(mutex);
files.erase(key);
}
delete ptr;
}
};
it->second = res;
return res;
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
return impls[bucket].get(path, flags);
}
void remove(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
files.erase(key);
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
impls[bucket].remove(path, flags);
}
static OpenedFileCache & instance()
@ -87,5 +113,4 @@ public:
};
using OpenedFileCachePtr = std::shared_ptr<OpenedFileCache>;
}

View File

@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf)
inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); }
template <typename T>
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
bool fixed_fractional_length, UInt32 fractional_length)
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
{
/// If it's big integer, but the number of digits is small,
/// use the implementation for smaller integers for more efficient arithmetic.
if constexpr (std::is_same_v<T, Int256>)
{
if (x <= std::numeric_limits<UInt32>::max())
{
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
return;
}
else if (x <= std::numeric_limits<UInt64>::max())
{
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
return;
}
else if (x <= std::numeric_limits<UInt128>::max())
{
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros);
return;
}
}
@ -932,36 +932,24 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
{
if (x <= std::numeric_limits<UInt32>::max())
{
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
return;
}
else if (x <= std::numeric_limits<UInt64>::max())
{
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
return;
}
}
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
assert(scale <= max_digits);
assert(fractional_length <= max_digits);
char buf[max_digits];
memset(buf, '0', std::max(scale, fractional_length));
memset(buf, '0', scale);
T value = x;
Int32 last_nonzero_pos = 0;
if (fixed_fractional_length && fractional_length < scale)
{
T new_value = value / DecimalUtils::scaleMultiplier<Int256>(scale - fractional_length - 1);
auto round_carry = new_value % 10;
value = new_value / 10;
if (round_carry >= 5)
value += 1;
}
for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos)
for (Int32 pos = scale - 1; pos >= 0; --pos)
{
auto remainder = value % 10;
value /= 10;
@ -973,12 +961,11 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
}
writeChar('.', ostr);
ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1));
ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1);
}
template <typename T>
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
bool fixed_fractional_length = false, UInt32 fractional_length = 0)
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
{
T part = DecimalUtils::getWholePart(x, scale);
@ -989,7 +976,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
writeIntText(part, ostr);
if (scale || (fixed_fractional_length && fractional_length > 0))
if (scale)
{
part = DecimalUtils::getFractionalPart(x, scale);
if (part || trailing_zeros)
@ -997,7 +984,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
if (part < 0)
part *= T(-1);
writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(part, scale, ostr, trailing_zeros);
}
}
}

View File

@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
}
else if (const auto * explain_query = ast->as<ASTExplainQuery>())
{
if (explain_query->settings_ast)
InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
}
else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))

View File

@ -3,6 +3,7 @@
#include <Parsers/CommonParsers.h>
#include <Parsers/ParserDescribeTableQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Common/typeid_cast.h>
@ -16,8 +17,10 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
ParserKeyword s_describe("DESCRIBE");
ParserKeyword s_desc("DESC");
ParserKeyword s_table("TABLE");
ParserKeyword s_settings("SETTINGS");
ParserToken s_dot(TokenType::Dot);
ParserIdentifier name_p;
ParserSetQuery parser_settings(true);
ASTPtr database;
ASTPtr table;
@ -29,12 +32,21 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
s_table.ignore(pos, expected);
ASTPtr table_expression;
if (!ParserTableExpression().parse(pos, table_expression, expected))
if (!ParserTableExpression().parse(pos, query->table_expression, expected))
return false;
query->children.push_back(std::move(table_expression));
query->table_expression = query->children.back();
/// For compatibility with SELECTs, where SETTINGS can be in front of FORMAT
ASTPtr settings;
if (s_settings.ignore(pos, expected))
{
if (!parser_settings.parse(pos, query->settings_ast, expected))
return false;
}
query->children.push_back(query->table_expression);
if (query->settings_ast)
query->children.push_back(query->settings_ast);
node = query;

View File

@ -156,7 +156,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
// SETTINGS key1 = value1, key2 = value2, ...
ParserKeyword s_settings("SETTINGS");
if (s_settings.ignore(pos, expected))
if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
{
ParserSetQuery parser_settings(true);
if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))

View File

@ -14,8 +14,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
{
ParserKeyword s_exists("EXISTS");
ParserKeyword s_temporary("TEMPORARY");
ParserKeyword s_describe("DESCRIBE");
ParserKeyword s_desc("DESC");
ParserKeyword s_show("SHOW");
ParserKeyword s_create("CREATE");
ParserKeyword s_database("DATABASE");

View File

@ -6,10 +6,18 @@
#include <Poco/Util/LayeredConfiguration.h>
#include <IO/HTTPCommon.h>
#include <Common/getResource.h>
#include <re2/re2.h>
#include <incbin.h>
#include "config.h"
/// Embedded HTML pages
INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html");
INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html");
INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js");
namespace DB
{
@ -34,13 +42,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
if (request.getURI().starts_with("/play"))
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("play.html");
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
}
else if (request.getURI().starts_with("/dashboard"))
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
std::string html(getResource("dashboard.html"));
std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
/// Replace a link to external JavaScript file to embedded file.
/// This allows to open the HTML without running a server and to host it on server.
@ -55,7 +63,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
else if (request.getURI() == "/js/uplot.js")
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("js/uplot.js");
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
}
else
{

View File

@ -60,7 +60,6 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterDescribeQuery.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Interpreters/InterpreterInsertQuery.h>
@ -75,6 +74,7 @@
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
@ -434,7 +434,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
{
/// Always calculate optimized cluster here, to avoid conditions during read()
/// (Anyway it will be calculated in the read())
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info);
if (optimized_cluster)
{
LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
@ -1297,7 +1297,7 @@ ClusterPtr StorageDistributed::getCluster() const
}
ClusterPtr StorageDistributed::getOptimizedCluster(
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const
{
ClusterPtr cluster = getCluster();
const Settings & settings = local_context->getSettingsRef();
@ -1306,7 +1306,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
if (has_sharding_key && sharding_key_is_usable)
{
ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context);
ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context);
if (optimized)
return optimized;
}
@ -1355,25 +1355,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c
/// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr`
ClusterPtr StorageDistributed::skipUnusedShards(
ClusterPtr cluster,
const ASTPtr & query_ptr,
const SelectQueryInfo & query_info,
const StorageSnapshotPtr & storage_snapshot,
ContextPtr local_context) const
{
const auto & select = query_ptr->as<ASTSelectQuery &>();
const auto & select = query_info.query->as<ASTSelectQuery &>();
if (!select.prewhere() && !select.where())
{
return nullptr;
}
/// FIXME: support analyzer
if (!query_info.syntax_analyzer_result)
return nullptr;
ASTPtr condition_ast;
if (select.prewhere() && select.where())
/// Remove JOIN from the query since it may contain a condition for other tables.
/// But only the conditions for the left table should be analyzed for shard skipping.
{
condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone());
}
else
{
condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone();
ASTPtr select_without_join_ptr = select.clone();
ASTSelectQuery select_without_join = select_without_join_ptr->as<ASTSelectQuery &>();
TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result;
removeJoin(select_without_join, analyzer_result_without_join, local_context);
if (!select_without_join.prewhere() && !select_without_join.where())
return nullptr;
if (select_without_join.prewhere() && select_without_join.where())
condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone());
else
condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone();
}
replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot);
@ -1396,11 +1405,9 @@ ClusterPtr StorageDistributed::skipUnusedShards(
return nullptr;
}
// Can't get definite answer if we can skip any shards
// Can't get a definite answer if we can skip any shards
if (!blocks)
{
return nullptr;
}
std::set<int> shards;

View File

@ -182,10 +182,10 @@ private:
/// Apply the following settings:
/// - optimize_skip_unused_shards
/// - force_optimize_skip_unused_shards
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const;
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const;
ClusterPtr skipUnusedShards(
ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
/// This method returns optimal query processing stage.
///

View File

@ -30,7 +30,6 @@ endif()
add_dependencies(generate-source generate-contributors)
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
add_custom_command(
OUTPUT StorageSystemLicenses.generated.cpp
@ -38,23 +37,13 @@ add_custom_command(
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
# Overlength strings
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
clickhouse_embed_binaries(
TARGET information_schema_metadata
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
RESOURCES schemata.sql tables.sql views.sql columns.sql
)
list (SORT storages_system_sources) # Reproducible build
add_library(clickhouse_storages_system ${storages_system_sources})
add_dependencies(clickhouse_storages_system information_schema_metadata)
target_link_libraries(clickhouse_storages_system PRIVATE
dbms
common
@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
clickhouse_common_zookeeper
clickhouse_parsers
Poco::JSON
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
)
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)

View File

@ -3,14 +3,23 @@
#include <Storages/System/attachSystemTablesImpl.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Common/getResource.h>
#include <incbin.h>
#include "config.h"
/// Embedded SQL definitions
INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql");
INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql");
INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql");
INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql");
namespace DB
{
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name)
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
{
try
{
@ -21,12 +30,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
String metadata_resource_name = view_name + ".sql";
auto attach_query = getResource(metadata_resource_name);
if (attach_query.empty())
if (query.empty())
return;
ParserCreateQuery parser;
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(),
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
"Attach query from embedded resource " + metadata_resource_name,
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
@ -50,10 +58,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
{
createInformationSchemaView(context, information_schema_database, "schemata");
createInformationSchemaView(context, information_schema_database, "tables");
createInformationSchemaView(context, information_schema_database, "views");
createInformationSchemaView(context, information_schema_database, "columns");
createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
}
}

View File

@ -2,7 +2,6 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/ClusterProxy/executeQuery.h>
#include <Interpreters/InterpreterDescribeQuery.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeString.h>

View File

@ -162,3 +162,5 @@ endif ()
if (TARGET ch_contrib::fiu)
set(FIU_ENABLE 1)
endif()
set(SOURCE_DIR ${CMAKE_SOURCE_DIR})

View File

@ -130,4 +130,6 @@
02581_share_big_sets_between_mutation_tasks_long
02581_share_big_sets_between_multiple_mutations_tasks_long
00992_system_parts_race_condition_zookeeper_long
02790_optimize_skip_unused_shards_join
01940_custom_tld_sharding_key
02815_range_dict_no_direct_join

View File

@ -3199,6 +3199,7 @@ class ClickHouseInstance:
):
self.name = name
self.base_cmd = cluster.base_cmd
self.base_dir = base_path
self.docker_id = cluster.get_instance_docker_id(self.name)
self.cluster = cluster
self.hostname = hostname if hostname is not None else self.name
@ -4193,6 +4194,14 @@ class ClickHouseInstance:
["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]
)
def put_users_config(self, config_path):
"""Put new config (useful if you cannot put it at the start)"""
instance_config_dir = p.abspath(p.join(self.path, "configs"))
users_d_dir = p.abspath(p.join(instance_config_dir, "users.d"))
config_path = p.join(self.base_dir, config_path)
shutil.copy(config_path, users_d_dir)
def create_dir(self):
"""Create the instance directory and all the needed files there."""

View File

@ -114,7 +114,10 @@ def node_update_config(mode, setting, value=None):
def assert_took(took, should_took):
assert took >= should_took[0] * 0.9 and took < should_took[1]
# we need to decrease the lower limit because the server limits could
# be enforced by throttling some server background IO instead of query IO
# and we have no control over it
assert took >= should_took[0] * 0.85 and took < should_took[1]
@pytest.mark.parametrize(

View File

@ -0,0 +1,7 @@
<clickhouse>
<profiles>
<default>
<force_remove_data_recursively_on_drop>1</force_remove_data_recursively_on_drop>
</default>
</profiles>
</clickhouse>

View File

@ -51,6 +51,12 @@ def start_cluster():
cluster.shutdown()
def restart_node(node):
# set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old)
node.put_users_config("configs/force_remove_data_recursively_on_drop.xml")
node.restart_with_latest_version(signal=9, fix_metadata=True)
def test_mutate_and_upgrade(start_cluster):
for node in [node1, node2]:
node.query("DROP TABLE IF EXISTS mt")
@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster):
node2.query("DETACH TABLE mt") # stop being leader
node1.query("DETACH TABLE mt") # stop being leader
node1.restart_with_latest_version(signal=9, fix_metadata=True)
node2.restart_with_latest_version(signal=9, fix_metadata=True)
restart_node(node1)
restart_node(node2)
# After hard restart table can be in readonly mode
exec_query_with_retry(
@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster):
# (We could be in process of creating some system table, which will leave empty directory on restart,
# so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
node3.query("SYSTEM FLUSH LOGS")
node3.restart_with_latest_version(signal=9, fix_metadata=True)
restart_node(node3)
# checks for readonly
exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)

View File

@ -238,10 +238,6 @@ defaultValueOfArgumentType
defaultValueOfTypeName
degrees
demangle
detectCharset
detectLanguageUnknown
detectProgrammingLanguage
detectTonality
divide
dotProduct
dumpColumnStructure

View File

@ -15,5 +15,7 @@ AND name NOT IN (
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test
'lemmatize', 'tokenize', 'stem', 'synonyms',
'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
-- these functions are not enabled in fast test
) ORDER BY name;

View File

@ -1,21 +0,0 @@
2.00000000000000000000000000000000000000000000000000000000000000000000000000000
2.12
-2.00000000000000000000000000000000000000000000000000000000000000000000000000000
-2.12
2.987600000000000033395508580724708735942840576171875000000000
2.15
-2.987600000000000033395508580724708735942840576171875000000000
-2.15
64.1230010986
64.2340000000
-64.1230010986
-64.2340000000
-32.345
32.34500000000000000000000000000000000000000000000000000000000000000000000000000
32.46
-64.5671232345
128.78932312332132985464
-128.78932312332132985464
128.78932312332132985464000000000000000000000000000000000000000000000000000000000
128.7893231233
-128.78932312332132985464123123789323123321329854600000000000000000000000000000000

View File

@ -1,35 +0,0 @@
-- Regular types
SELECT toDecimalString(2, 77); -- more digits required than exist
SELECT toDecimalString(2.123456, 2); -- rounding
SELECT toDecimalString(-2, 77); -- more digits required than exist
SELECT toDecimalString(-2.123456, 2); -- rounding
SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default)
SELECT toDecimalString(2.1456, 2); -- rounding
SELECT toDecimalString(-2.9876, 60); -- more digits required than exist
SELECT toDecimalString(-2.1456, 2); -- rounding
-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway.
SELECT toDecimalString(64.123::Float32, 10);
SELECT toDecimalString(64.234::Float64, 10);
SELECT toDecimalString(-64.123::Float32, 10);
SELECT toDecimalString(-64.234::Float64, 10);
-- Decimals
SELECT toDecimalString(-32.345::Decimal32(3), 3);
SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist
SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding
SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10);
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20);
SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist
SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist
-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float.
-- These values shall work OK.
SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}

View File

@ -0,0 +1,10 @@
"id","Nullable(Int64)","","","","",""
"age","LowCardinality(UInt8)","","","","",""
"name","Nullable(String)","","","","",""
"status","Nullable(String)","","","","",""
"hobbies","Array(Nullable(String))","","","","",""
"id","Nullable(Int64)","","","","",""
"age","LowCardinality(UInt8)","","","","",""
"name","Nullable(String)","","","","",""
"status","Nullable(String)","","","","",""
"hobbies","Array(Nullable(String))","","","","",""

View File

@ -0,0 +1,3 @@
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 FORMAT CSV;
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1;
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 SETTINGS max_threads=0; -- { clientError SYNTAX_ERROR }

View File

@ -0,0 +1,55 @@
-- Issue: https://github.com/ClickHouse/ClickHouse/issues/15995
DROP TABLE IF EXISTS outer;
DROP TABLE IF EXISTS inner;
DROP TABLE IF EXISTS outer_distributed;
DROP TABLE IF EXISTS inner_distributed;
CREATE TABLE IF NOT EXISTS outer
(
`id` UInt64,
`organization_id` UInt64,
`version` UInt64
)
ENGINE = ReplacingMergeTree(version)
PARTITION BY organization_id % 8
ORDER BY (organization_id, id);
CREATE TABLE inner
(
`id` UInt64,
`outer_id` UInt64,
`organization_id` UInt64,
`version` UInt64,
`date` Date
)
ENGINE = ReplacingMergeTree(version)
PARTITION BY toYYYYMM(date)
ORDER BY (organization_id, outer_id);
CREATE TABLE inner_distributed AS inner
ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'inner', intHash64(organization_id));
CREATE TABLE outer_distributed AS outer
ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'outer', intHash64(organization_id));
SELECT
sum(if(inner_distributed.id != 0, 1, 0)) AS total,
inner_distributed.date AS date
FROM outer_distributed AS outer_distributed
FINAL
LEFT JOIN
(
SELECT
inner_distributed.outer_id AS outer_id,
inner_distributed.id AS id,
inner_distributed.date AS date
FROM inner_distributed AS inner_distributed
FINAL
WHERE inner_distributed.organization_id = 15078
) AS inner_distributed ON inner_distributed.outer_id = outer_distributed.id
WHERE (outer_distributed.organization_id = 15078) AND (date != toDate('1970-01-01'))
GROUP BY date
ORDER BY date DESC
SETTINGS distributed_product_mode = 'local', optimize_skip_unused_shards = 1;

View File

@ -0,0 +1,11 @@
explain
(Expression)
ExpressionTransform
(Aggregating)
FinalizeAggregatedTransform
AggregatingInOrderTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
MergeTreeInOrder 0 → 1

View File

@ -0,0 +1,18 @@
SET read_in_order_two_level_merge_threshold=1000000;
DROP TABLE IF EXISTS t;
CREATE TABLE t(a UInt64)
ENGINE = MergeTree
ORDER BY a;
INSERT INTO t SELECT * FROM numbers_mt(1e3);
OPTIMIZE TABLE t FINAL;
EXPLAIN PIPELINE
SELECT a
FROM t
GROUP BY a
FORMAT PrettySpace
SETTINGS optimize_aggregation_in_order = 1;
DROP TABLE t;