Merge remote-tracking branch 'origin/master' into ADQM-940

This commit is contained in:
Dmitry Kardymon 2023-07-24 20:04:10 +00:00
commit d4d381de7e
100 changed files with 1472 additions and 1173 deletions

3
.gitmodules vendored
View File

@ -340,3 +340,6 @@
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/c-ares/c-ares.git
[submodule "contrib/incbin"]
path = contrib/incbin
url = https://github.com/graphitemaster/incbin.git

View File

@ -1,58 +0,0 @@
# Embed a set of resource files into a resulting object file.
#
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
#
# This will generate a static library target named `<target>`, which contains the contents of
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
#
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
# These are:
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
#
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
macro(clickhouse_embed_binaries)
set(one_value_args TARGET RESOURCE_DIR)
set(resources RESOURCES)
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
if (NOT DEFINED EMBED_TARGET)
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
endif()
if (NOT DEFINED EMBED_RESOURCE_DIR)
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
endif()
list(LENGTH EMBED_RESOURCES N_RESOURCES)
if (N_RESOURCES LESS 1)
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
endif()
add_library("${EMBED_TARGET}" STATIC)
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
# Normalize the name of the resource.
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
# Generate the configured assembly file in the output directory.
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
# Set the include directory for relative paths specified for `.incbin` directive.
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
endforeach()
endmacro()

View File

@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)
add_contrib (libstemmer-c-cmake libstemmer_c)
add_contrib (wordnet-blast-cmake wordnet-blast)
add_contrib (lemmagen-c-cmake lemmagen-c)
add_contrib (nlp-data-cmake nlp-data)
add_contrib (cld2-cmake cld2)
endif()

View File

@ -1,4 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
set (SRCS
@ -23,12 +22,10 @@ if (OS_FREEBSD)
endif ()
# Related to time_zones table:
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
# as the library that's built using embedded tzdata is also specific to OS_LINUX
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
# TimeZones.generated.cpp is autogenerated each time during a build
set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
# remove existing copies so that its generated fresh on each build.
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
file(REMOVE ${TIMEZONES_FILE})
# get the list of timezones from tzdata shipped with cctz
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
set(TIMEZONE_RESOURCE_FILES)
# each file in that dir (except of tab and localtime) store the info about timezone
execute_process(COMMAND
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
OUTPUT_STRIP_TRAILING_WHITESPACE
OUTPUT_VARIABLE TIMEZONES)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
clickhouse_embed_binaries(
TARGET tzdata
RESOURCE_DIR "${TZDIR}"
RESOURCES ${TIMEZONE_RESOURCE_FILES}
)
add_dependencies(_cctz tzdata)
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
set (COUNTER 1)
foreach(TIMEZONE ${TIMEZONES})
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
MATH(EXPR COUNTER "${COUNTER}+1")
endforeach(TIMEZONE)
file(APPEND ${TIMEZONES_FILE} " return {};\n")
file(APPEND ${TIMEZONES_FILE} "}\n")
add_library (tzdata ${TIMEZONES_FILE})
target_link_libraries(tzdata ch_contrib::incbin)
target_link_libraries(_cctz tzdata)
add_library(ch_contrib::cctz ALIAS _cctz)

1
contrib/incbin vendored Submodule

@ -0,0 +1 @@
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf

View File

@ -0,0 +1,8 @@
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
add_library(_incbin INTERFACE)
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
add_library(ch_contrib::incbin ALIAS _incbin)
# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)

View File

@ -1,15 +0,0 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
add_library (_nlp_data INTERFACE)
clickhouse_embed_binaries(
TARGET nlp_dictionaries
RESOURCE_DIR "${LIBRARY_DIR}"
RESOURCES charset.zst tonality_ru.zst programming.zst
)
add_dependencies(_nlp_data nlp_dictionaries)
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
add_library(ch_contrib::nlp_data ALIAS _nlp_data)

View File

@ -147,6 +147,7 @@ function clone_submodules
contrib/simdjson
contrib/liburing
contrib/libfiu
contrib/incbin
)
git submodule sync

View File

@ -4,6 +4,9 @@
set -e -x -a
# Choose random timezone for this test run.
#
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
# (it will choose between default server timezone and something specific).
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone

View File

@ -945,44 +945,6 @@ Result:
└────────────┴───────┘
```
## toDecimalString
Converts a numeric value to String with the number of fractional digits in the output specified by the user.
**Syntax**
``` sql
toDecimalString(number, scale)
```
**Parameters**
- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
* Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
* Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60.
**Returned value**
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
**Example**
Query:
``` sql
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
```
Result:
```response
┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐
│ 64.32000 │
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64)
## reinterpretAsInt(8\|16\|32\|64)

View File

@ -762,44 +762,6 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
└────────────┴───────┘
```
## toDecimalString
Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом.
**Синтаксис**
``` sql
toDecimalString(number, scale)
```
**Параметры**
- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md),
- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md).
* Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов),
* Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60.
**Возвращаемое значение**
- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части.
При необходимости число округляется по стандартным правилам арифметики.
**Пример использования**
Запрос:
``` sql
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
```
Результат:
```response
┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐
│ 64.32000 │
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}

View File

@ -20,10 +20,7 @@
#include <Common/formatReadable.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/OpenSSLHelpers.h>
#include <base/hex.h>
#include <Common/getResource.h>
#include <base/sleep.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
@ -35,6 +32,14 @@
#include <Poco/Util/XMLConfiguration.h>
#include <incbin.h>
#include "config.h"
/// Embedded configuration files used inside the install program
INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml");
INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
* It also allows to avoid dependency on systemd, upstart, SysV init.
@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(main_config_file))
{
std::string_view main_config_content = getResource("config.xml");
std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
if (main_config_content.empty())
{
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
if (!fs::exists(users_config_file))
{
std::string_view users_config_content = getResource("users.xml");
std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
if (users_config_content.empty())
{
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());

View File

@ -1,16 +1,3 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
if (OS_LINUX)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
# for some reason INTERFACE linkage doesn't work for standalone binary
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
endif ()
clickhouse_embed_binaries(
TARGET clickhouse_keeper_configs
RESOURCES keeper_config.xml keeper_embedded.xml
)
set(CLICKHOUSE_KEEPER_SOURCES
Keeper.cpp
)
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_program_add(keeper)
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER)
# Straight list of all required sources
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (SPLIT_DEBUG_SYMBOLS)

View File

@ -457,8 +457,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
if (!key_path.empty()) extra_paths.emplace_back(key_path);
if (!cert_path.empty())
extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
/// ConfigReloader have to strict parameters which are redundant in our case
auto main_config_reloader = std::make_unique<ConfigReloader>(

View File

@ -1,12 +1,8 @@
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
set(CLICKHOUSE_SERVER_SOURCES
MetricsTransmitter.cpp
Server.cpp
)
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
set (CLICKHOUSE_SERVER_LINK
PRIVATE
clickhouse_aggregate_functions
@ -34,9 +30,3 @@ endif()
clickhouse_program_add(server)
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
clickhouse_embed_binaries(
TARGET clickhouse_server_configs
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
)
add_dependencies(clickhouse-server-lib clickhouse_server_configs)

View File

@ -128,6 +128,10 @@
# include <azure/storage/common/internal/xml_wrapper.hpp>
#endif
#include <incbin.h>
/// A minimal file used when the server is run without installation
INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml");
namespace CurrentMetrics
{
extern const Metric Revision;
@ -393,6 +397,7 @@ int Server::run()
void Server::initialize(Poco::Util::Application & self)
{
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
BaseDaemon::initialize(self);
logger().information("starting up");
@ -1106,8 +1111,10 @@ try
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
std::vector<std::string> extra_paths = {include_from_path};
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
if (!key_path.empty()) extra_paths.emplace_back(key_path);
if (!cert_path.empty())
extra_paths.emplace_back(cert_path);
if (!key_path.empty())
extra_paths.emplace_back(key_path);
auto main_config_reloader = std::make_unique<ConfigReloader>(
config_path,

View File

View File

@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
endif()
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
add_subdirectory(Access/Common)
add_subdirectory(Common/ZooKeeper)
@ -561,7 +561,6 @@ if (ENABLE_NLP)
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
endif()
if (TARGET ch_contrib::ulid)

View File

@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
endif()
if (ENABLE_MYSQL)
add_subdirectory (mysqlxx)
add_subdirectory(mysqlxx)
endif ()

View File

@ -19,7 +19,6 @@
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <Common/getResource.h>
#include <Common/XMLUtils.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
@ -83,6 +82,13 @@ ConfigProcessor::~ConfigProcessor()
Poco::Logger::destroy("ConfigProcessor");
}
static std::unordered_map<std::string, std::string_view> embedded_configs;
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
{
embedded_configs[name] = content;
}
/// Vector containing the name of the element and a sorted list of attribute names and values
/// (except "remove" and "replace" attributes).
@ -281,15 +287,15 @@ void ConfigProcessor::doIncludesRecursive(
{
std::string value = node->nodeValue();
bool replace_occured = false;
bool replace_occurred = false;
size_t pos;
while ((pos = value.find(substitution.first)) != std::string::npos)
{
value.replace(pos, substitution.first.length(), substitution.second);
replace_occured = true;
replace_occurred = true;
}
if (replace_occured)
if (replace_occurred)
node->setNodeValue(value);
}
}
@ -528,26 +534,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
}
else
{
/// These embedded files added during build with some cmake magic.
/// Look at the end of programs/server/CMakeLists.txt.
std::string embedded_name;
if (path == "config.xml")
embedded_name = "embedded.xml";
if (path == "keeper_config.xml")
embedded_name = "keeper_embedded.xml";
/// When we can use config embedded in binary.
if (!embedded_name.empty())
/// When we can use a config embedded in the binary.
if (auto it = embedded_configs.find(path); it != embedded_configs.end())
{
auto resource = getResource(embedded_name);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
config = dom_parser.parseMemory(resource.data(), resource.size());
config = dom_parser.parseMemory(it->second.data(), it->second.size());
}
else
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path);
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
}
std::vector<std::string> contributing_files;

View File

@ -65,6 +65,9 @@ public:
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
const zkutil::EventPtr & zk_changed_event = nullptr);
/// These configurations will be used if there is no configuration file.
static void registerEmbeddedConfig(std::string name, std::string_view content);
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
/// The resulting XML document is saved into a file with the name

View File

@ -3,7 +3,6 @@
#include <cctz/civil_time.h>
#include <cctz/time_zone.h>
#include <cctz/zone_info_source.h>
#include <Common/getResource.h>
#include <Poco/Exception.h>
#include <algorithm>
@ -11,6 +10,11 @@
#include <chrono>
#include <cstring>
#include <memory>
#include <iostream>
/// Embedded timezones.
std::string_view getTimeZone(const char * name);
namespace
@ -249,9 +253,10 @@ namespace cctz_extension
const std::string & name,
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
{
std::string_view resource = getResource(name);
if (!resource.empty())
return std::make_unique<Source>(resource.data(), resource.size());
std::string_view tz_file = getTimeZone(name.data());
if (!tz_file.empty())
return std::make_unique<Source>(tz_file.data(), tz_file.size());
return fallback(name);
}

View File

@ -0,0 +1,185 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <incbin.h>
/// Embedded SQL definitions
INCBIN(resource_charset_zst, SOURCE_DIR "/contrib/nlp-data/charset.zst");
INCBIN(resource_tonality_ru_zst, SOURCE_DIR "/contrib/nlp-data/tonality_ru.zst");
INCBIN(resource_programming_zst, SOURCE_DIR "/contrib/nlp-data/programming.zst");
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
FrequencyHolder & FrequencyHolder::getInstance()
{
static FrequencyHolder instance;
return instance;
}
FrequencyHolder::FrequencyHolder()
{
loadEmotionalDict();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void FrequencyHolder::loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void FrequencyHolder::loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void FrequencyHolder::loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
}
#endif

View File

@ -1,5 +1,9 @@
#pragma once
#include "config.h"
#if USE_NLP
#include <base/StringRef.h>
#include <Common/logger_useful.h>
@ -7,7 +11,6 @@
#include <unordered_map>
#include <Common/Arena.h>
#include <Common/getResource.h>
#include <Common/HashTable/HashMap.h>
#include <Common/StringUtils/StringUtils.h>
#include <IO/ReadBufferFromFile.h>
@ -20,11 +23,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
}
/// FrequencyHolder class is responsible for storing and loading dictionaries
/// needed for text classification functions:
///
@ -56,11 +54,7 @@ public:
using EncodingMap = HashMap<UInt16, Float64>;
using EncodingContainer = std::vector<Encoding>;
static FrequencyHolder & getInstance()
{
static FrequencyHolder instance;
return instance;
}
static FrequencyHolder & getInstance();
const Map & getEmotionalDict() const
{
@ -78,161 +72,11 @@ public:
}
private:
FrequencyHolder();
FrequencyHolder()
{
loadEmotionalDict();
loadEncodingsFrequency();
loadProgrammingFrequency();
}
void loadEncodingsFrequency()
{
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
LOG_TRACE(log, "Loading embedded charset frequencies");
auto resource = getResource("charset.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
String line;
UInt16 bigram;
Float64 frequency;
String charset_name;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new charset
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(charset_name, buf_line);
/* In our dictionary we have lines with form: <Language>_<Charset>
* If we need to find language of data, we return <Language>
* If we need to find charset of data, we return <Charset>.
*/
size_t sep = charset_name.find('_');
Encoding enc;
enc.lang = charset_name.substr(0, sep);
enc.name = charset_name.substr(sep + 1);
encodings_freq.push_back(std::move(enc));
}
else
{
readIntText(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
encodings_freq.back().map[bigram] = frequency;
}
}
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
}
void loadEmotionalDict()
{
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
LOG_TRACE(log, "Loading embedded emotional dictionary");
auto resource = getResource("tonality_ru.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
String line;
String word;
Float64 tonality;
size_t count = 0;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
readStringUntilWhitespace(word, buf_line);
buf_line.ignore();
readFloatText(tonality, buf_line);
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
emotional_dict[ref] = tonality;
++count;
}
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
}
void loadProgrammingFrequency()
{
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
auto resource = getResource("programming.zst");
if (resource.empty())
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
String line;
String bigram;
Float64 frequency;
String programming_language;
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
ZstdInflatingReadBuffer in(std::move(buf));
while (!in.eof())
{
readString(line, in);
in.ignore();
if (line.empty())
continue;
ReadBufferFromString buf_line(line);
// Start loading a new language
if (line.starts_with("// "))
{
// Skip "// "
buf_line.ignore(3);
readString(programming_language, buf_line);
Language lang;
lang.name = programming_language;
programming_freq.push_back(std::move(lang));
}
else
{
readStringUntilWhitespace(bigram, buf_line);
buf_line.ignore();
readFloatText(frequency, buf_line);
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
programming_freq.back().map[ref] = frequency;
}
}
LOG_TRACE(log, "Programming languages frequencies was added");
}
void loadEncodingsFrequency();
void loadEmotionalDict();
void loadProgrammingFrequency();
Arena string_pool;
@ -241,3 +85,5 @@ private:
EncodingContainer encodings_freq;
};
}
#endif

View File

@ -45,6 +45,7 @@
M(MMappedFileCacheMisses, "Number of times a file has not been found in the MMap cache (for the 'mmap' read_method), so we had to mmap it again.") \
M(OpenedFileCacheHits, "Number of times a file has been found in the opened file cache, so we didn't have to open it again.") \
M(OpenedFileCacheMisses, "Number of times a file has been found in the opened file cache, so we had to open it again.") \
M(OpenedFileCacheMicroseconds, "Amount of time spent executing OpenedFileCache methods.") \
M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \
M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \
M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \

View File

@ -87,50 +87,13 @@ namespace
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
{
const char * char_address = static_cast<const char *>(address);
if (name.starts_with("_binary_") || name.starts_with("binary_"))
{
if (name.ends_with("_start"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_start"));
auto & resource = resources[name];
if (!resource.base_address || resource.base_address == base_address)
{
resource.base_address = base_address;
resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
resource.object_name = object_name;
}
}
if (name.ends_with("_end"))
{
name = name.substr((name[0] == '_') + strlen("binary_"));
name = name.substr(0, name.size() - strlen("_end"));
auto & resource = resources[name];
if (!resource.base_address || resource.base_address == base_address)
{
resource.base_address = base_address;
resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
resource.object_name = object_name;
}
}
}
}
/// Based on the code of musl-libc and the answer of Kanalpiroge on
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
/// but will work if we cannot find or parse ELF files.
void collectSymbolsFromProgramHeaders(
dl_phdr_info * info,
std::vector<SymbolIndex::Symbol> & symbols,
SymbolIndex::Resources & resources)
std::vector<SymbolIndex::Symbol> & symbols)
{
/* Iterate over all headers of the current shared lib
* (first call is for the executable itself)
@ -248,9 +211,6 @@ void collectSymbolsFromProgramHeaders(
/// We are not interested in empty symbols.
if (elf_sym[sym_index].st_size)
symbols.push_back(symbol);
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
}
break;
@ -281,8 +241,7 @@ void collectSymbolsFromELFSymbolTable(
const Elf & elf,
const Elf::Section & symbol_table,
const Elf::Section & string_table,
std::vector<SymbolIndex::Symbol> & symbols,
SymbolIndex::Resources & resources)
std::vector<SymbolIndex::Symbol> & symbols)
{
/// Iterate symbol table.
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
@ -312,8 +271,6 @@ void collectSymbolsFromELFSymbolTable(
if (symbol_table_entry->st_size)
symbols.push_back(symbol);
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
}
}
@ -323,8 +280,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
const Elf & elf,
unsigned section_header_type,
const char * string_table_name,
std::vector<SymbolIndex::Symbol> & symbols,
SymbolIndex::Resources & resources)
std::vector<SymbolIndex::Symbol> & symbols)
{
std::optional<Elf::Section> symbol_table;
std::optional<Elf::Section> string_table;
@ -342,7 +298,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
return false;
}
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources);
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
return true;
}
@ -351,7 +307,6 @@ void collectSymbolsFromELF(
dl_phdr_info * info,
std::vector<SymbolIndex::Symbol> & symbols,
std::vector<SymbolIndex::Object> & objects,
SymbolIndex::Resources & resources,
String & build_id)
{
String object_name;
@ -462,11 +417,11 @@ void collectSymbolsFromELF(
object.name = object_name;
objects.push_back(std::move(object));
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources);
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
/// Unneeded if they were parsed from "program headers" of loaded objects.
#if defined USE_MUSL
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources);
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
#endif
}
@ -479,8 +434,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
{
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
collectSymbolsFromProgramHeaders(info, data.symbols, data.resources);
collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id);
collectSymbolsFromProgramHeaders(info, data.symbols);
collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
/* Continue iterations */
return 0;

View File

@ -8,6 +8,7 @@
#include <Common/Elf.h>
#include <boost/noncopyable.hpp>
namespace DB
{
@ -45,44 +46,15 @@ public:
const std::vector<Symbol> & symbols() const { return data.symbols; }
const std::vector<Object> & objects() const { return data.objects; }
std::string_view getResource(String name) const
{
if (auto it = data.resources.find(name); it != data.resources.end())
return it->second.data();
return {};
}
/// The BuildID that is generated by compiler.
String getBuildID() const { return data.build_id; }
String getBuildIDHex() const;
struct ResourcesBlob
{
/// Symbol can be presented in multiple shared objects,
/// base_address will be used to compare only symbols from the same SO.
ElfW(Addr) base_address = 0;
/// Just a human name of the SO.
std::string_view object_name;
/// Data blob.
std::string_view start;
std::string_view end;
std::string_view data() const
{
assert(end.data() >= start.data());
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
}
};
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
struct Data
{
std::vector<Symbol> symbols;
std::vector<Object> objects;
String build_id;
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
Resources resources;
};
private:
Data data;

View File

@ -59,3 +59,7 @@
#cmakedefine01 USE_ULID
#cmakedefine01 FIU_ENABLE
#cmakedefine01 USE_BCRYPT
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
/// That's why we use absolute paths.
#cmakedefine SOURCE_DIR "@SOURCE_DIR@"

View File

@ -1,52 +0,0 @@
#include "getResource.h"
#include <dlfcn.h>
#include <string>
#include <boost/algorithm/string/replace.hpp>
#include <Common/SymbolIndex.h>
std::string_view getResource(std::string_view name)
{
// Convert the resource file name into the form generated by `ld -r -b binary`.
std::string name_replaced(name);
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
boost::replace_all(name_replaced, "+", "_PLUS_");
#if defined USE_MUSL
/// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
return DB::SymbolIndex::instance().getResource(name_replaced);
#else
// In most `dlsym(3)` APIs, one passes the symbol name as it appears via
// something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
// looked up with the string `"_foo"`.
//
// Apple's linker is confusingly different. The NOTES on the man page for
// `dlsym(3)` claim that one looks up the symbol with "the name used in C
// source code". In this example, that would mean using the string `"foo"`.
// This apparently applies even in the case where the symbol did not originate
// from C source, such as the embedded binary resource files used here. So
// the symbol name must not have a leading `_` on Apple platforms. It's not
// clear how this applies to other symbols, such as those which _have_ a leading
// underscore in them by design, many leading underscores, etc.
#if defined OS_DARWIN
std::string prefix = "binary_";
#else
std::string prefix = "_binary_";
#endif
std::string symbol_name_start = prefix + name_replaced + "_start";
std::string symbol_name_end = prefix + name_replaced + "_end";
const char * sym_start = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
const char * sym_end = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));
if (sym_start && sym_end)
{
auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
return { sym_start, resource_size };
}
return {};
#endif
}

View File

@ -1,7 +0,0 @@
#pragma once
#include <string_view>
/// Get resource from binary if exists. Otherwise return empty string view.
/// Resources are data that is embedded into executable at link time.
std::string_view getResource(std::string_view name);

View File

@ -44,4 +44,15 @@ String backQuoteIfNeed(StringRef x)
return res;
}
String backQuoteMySQL(StringRef x)
{
String res(x.size, '\0');
{
WriteBufferFromString wb(res);
writeBackQuotedStringMySQL(x, wb);
}
return res;
}
}

View File

@ -24,4 +24,7 @@ String backQuote(StringRef x);
/// Quote the identifier with backquotes, if required.
String backQuoteIfNeed(StringRef x);
/// Quote the identifier with backquotes, for use in MySQL queries.
String backQuoteMySQL(StringRef x);
}

View File

@ -548,4 +548,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
// {0, 0 + 11 * 3600 * 24 + 12, 11},
}))
);

View File

@ -38,7 +38,6 @@
#include <base/coverage.h>
#include <base/sleep.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/ReadHelpers.h>

View File

@ -4,6 +4,7 @@
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
#include <cstdlib>
#include <random>
#include <string_view>
@ -342,9 +343,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection,
{ std::make_shared<DataTypeString>(), "column_type" }
};
const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
" WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) +
"' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION";
String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
" WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION";
StreamSettings mysql_input_stream_settings(global_settings, false, true);
auto mysql_source = std::make_unique<MySQLSource>(connection, query, tables_columns_sample_block, mysql_input_stream_settings);
@ -812,6 +812,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
CurrentThread::QueryScope query_scope(query_context);
String query = query_event.query;
tryQuoteUnrecognizedTokens(query, query);
if (!materialized_tables_list.empty())
{
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);

View File

@ -0,0 +1,289 @@
#include <gtest/gtest.h>
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
using namespace DB;
struct TestCase
{
String query;
String res;
bool ok;
TestCase(
const String & query_,
const String & res_,
bool ok_)
: query(query_)
, res(res_)
, ok(ok_)
{
}
};
std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case)
{
return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok;
}
class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam<TestCase>
{
};
TEST_P(QuoteUnrecognizedTokensTest, escape)
{
const auto & [query, expected, ok] = GetParam();
String actual;
bool res = tryQuoteUnrecognizedTokens(query, actual);
EXPECT_EQ(ok, res);
EXPECT_EQ(expected, actual);
}
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list<TestCase>{
{
"",
"",
false
},
{
"test '\"`",
"",
false
},
{
"SELECT * FROM db.`table`",
"",
false
},
{
"道渠",
"`道渠`",
true
},
{
"",
"`道`",
true
},
{
"道道(skip) 道(",
"`道道`(skip) `道`(",
true
},
{
"`道渠`",
"",
false
},
{
"'道'",
"",
false
},
{
"\"\"",
"",
false
},
{
"` 道 test 渠 `",
"",
false
},
{
"skip 道 skip 123",
"skip `道` skip 123",
true
},
{
"skip 123 `道` skip",
"",
false
},
{
"skip `道 skip 123",
"",
false
},
{
"skip test道 skip",
"skip `test道` skip",
true
},
{
"test道2test",
"`test道2test`",
true
},
{
"skip test道2test 123",
"skip `test道2test` 123",
true
},
{
"skip 您a您a您a a您a您a您a 1您2您3您4 skip",
"skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip",
true
},
{
"skip 您a 您a您a b您2您c您4 skip",
"skip `您a` `您a您a` `b您2您c您4` skip",
true
},
{
"123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip",
"`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip",
true
},
{
"_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip",
"`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip",
true
},
{
"TABLE 您2 您(",
"TABLE `您2` `您`(",
true
},
{
"TABLE 您.a您2(日2日2 INT",
"TABLE `您`.`a您2`(`日2日2` INT",
true
},
{
"TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)",
"TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)",
true
},
{
"TABLE 您a日.您a您a您a(test INT",
"TABLE `您a日`.`您a您a您a`(test INT",
true
},
{
"TABLE 您a日.您a您a您a(Hi您Hi好Hi INT",
"TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT",
true
},
{
"--TABLE 您a日.您a您a您a(test INT",
"",
false
},
{
"--您a日.您a您a您a(\n您Hi好",
"--您a日.您a您a您a(\n`您Hi好`",
true
},
{
" /* TABLE 您a日.您a您a您a(test INT",
"",
false
},
{
"/*您a日.您a您a您a(*/\n您Hi好",
"/*您a日.您a您a您a(*/\n`您Hi好`",
true
},
{
" 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a",
" `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`",
true
},
//{ TODO
// "TABLE 您2.您a您a您a(test INT",
// "TABLE `您2`.`您a您a您a`(test INT",
// true
//},
{
"skip 您a您a您a skip",
"skip `您a您a您a` skip",
true
},
{
"test 您a2您3a您a 4 again",
"test `您a2您3a您a` 4 again",
true
},
{
"CREATE TABLE db.`道渠`",
"",
false
},
{
"CREATE TABLE db.`道渠",
"",
false
},
{
"CREATE TABLE db.道渠",
"CREATE TABLE db.`道渠`",
true
},
{
"CREATE TABLE db. 道渠",
"CREATE TABLE db. `道渠`",
true
},
{
R"sql(
CREATE TABLE gb2312.`` ( `id` int NOT NULL,
INT,
DATETIME,
test INT, test您 INT, test您test INT,
test INT, test道渠 INT, test道渠test INT,
_ INT, _您 INT, _您_ INT,
__ INT, __您您 INT, __您您__ INT,
2 INT, 2 INT, 22 INT,
22 INT, 22 INT, 2222 INT,
_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT,
__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT,
2_ INT, 2_您 INT, 2_您2_ INT,
22__ INT, 22__您您 INT, 22__您您22__ INT,
_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT,
_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT,
test3 INT, test3您 INT, test3您test3 INT, test3您3test INT,
test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test INT,
3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT,
3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT,
_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT,
_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT,
_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT,
test_日期 varchar(256), test_道_2 varchar(256) NOT NULL ,
test_道渠您_3
BIGINT NOT NULL,
3_test INT,
PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
)sql",
R"sql(
CREATE TABLE gb2312.`` ( `id` int NOT NULL,
`` INT,
`` DATETIME,
`test` INT, `test您` INT, `test您test` INT,
`test` INT, `test道渠` INT, `test道渠test` INT,
`_` INT, `_您` INT, `_您_` INT,
`__` INT, `__您您` INT, `__您您__` INT,
`2` INT, `2` INT, `22` INT,
`22` INT, `22` INT, `2222` INT,
`_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT,
`__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT,
`2_` INT, `2_您` INT, `2_您2_` INT,
`22__` INT, `22__您您` INT, `22__您您22__` INT,
`_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT,
`_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT,
`test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT,
`test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test` INT,
`3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT,
`3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT,
`_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT,
`_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT,
`_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT,
`test_日期` varchar(256), `test_道_2` varchar(256) NOT NULL ,
`test_道渠您_3`
BIGINT NOT NULL,
`3_test` INT,
PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
)sql",
true
},
}));

View File

@ -0,0 +1,96 @@
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
#include <Parsers/CommonParsers.h>
#include <Common/quoteString.h>
namespace DB
{
/// Checks if there are no any tokens (like whitespaces) between current and previous pos
static bool noWhitespaces(const char * to, const char * from)
{
return static_cast<size_t>(from - to) == 0;
}
/// Checks if the token should be quoted too together with unrecognized
static bool isWordOrNumber(TokenType type)
{
return type == TokenType::BareWord || type == TokenType::Number;
}
static void quoteLiteral(
IParser::Pos & pos,
IParser::Pos & pos_prev,
const char *& pos_unrecognized,
const char *& copy_from,
String & rewritten_query)
{
/// Copy also whitespaces if any
const auto * end =
isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin)
? pos->end
: pos_prev->end;
String literal(pos_unrecognized, static_cast<size_t>(end - pos_unrecognized));
rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal));
copy_from = end;
}
bool tryQuoteUnrecognizedTokens(const String & query, String & res)
{
Tokens tokens(query.data(), query.data() + query.size());
IParser::Pos pos(tokens, 0);
Expected expected;
String rewritten_query;
const char * copy_from = query.data();
auto pos_prev = pos;
const char * pos_unrecognized = nullptr;
for (;pos->type != TokenType::EndOfStream; ++pos)
{
/// Commit quotes if any whitespaces found or the token is not a word
bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type));
if (pos_unrecognized && commit)
{
quoteLiteral(
pos,
pos_prev,
pos_unrecognized,
copy_from,
rewritten_query);
pos_unrecognized = nullptr;
}
if (pos->type == TokenType::Error)
{
/// Find first appearance of the error token
if (!pos_unrecognized)
{
pos_unrecognized =
isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin)
? pos_prev->begin
: pos->begin;
}
}
pos_prev = pos;
}
/// There was EndOfStream but not committed unrecognized token
if (pos_unrecognized)
{
quoteLiteral(
pos,
pos_prev,
pos_unrecognized,
copy_from,
rewritten_query);
pos_unrecognized = nullptr;
}
/// If no Errors found
if (copy_from == query.data())
return false;
auto size = static_cast<size_t>(pos->end - copy_from);
rewritten_query.append(copy_from, size);
res = rewritten_query;
return true;
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <base/types.h>
namespace DB
{
bool tryQuoteUnrecognizedTokens(const String & query, String & res);
}

View File

@ -1,22 +0,0 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionToDecimalString.h>
#include <Functions/IFunction.h>
namespace DB
{
REGISTER_FUNCTION(ToDecimalString)
{
factory.registerFunction<FunctionToDecimalString>(
FunctionDocumentation{
.description=R"(
Returns string representation of a number. First argument is the number of any numeric type,
second argument is the desired number of digits in fractional part. Returns String.
)",
.examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
.categories{"String"}
}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -1,312 +0,0 @@
#pragma once
#include <Core/Types.h>
#include <Core/DecimalFunctions.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER;
}
class FunctionToDecimalString : public IFunction
{
public:
static constexpr auto name = "toDecimalString";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToDecimalString>(); }
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isNumber(*arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal first argument for formatDecimal function: got {}, expected numeric type",
arguments[0]->getName());
if (!isUInt8(*arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal second argument for formatDecimal function: got {}, expected UInt8",
arguments[1]->getName());
return std::make_shared<DataTypeString>();
}
bool useDefaultImplementationForConstants() const override { return true; }
private:
/// For operations with Integer/Float
template <typename FromVectorType>
void vectorConstant(const FromVectorType & vec_from, UInt8 precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
/// Buffer is used here and in functions below because resulting size cannot be precisely anticipated,
/// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case.
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
for (size_t i = 0; i < input_rows_count; ++i)
{
format(vec_from[i], buf_to, precision);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgVectorType>
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
format(vec_from[i], buf_to, vec_precision[i]);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgType>
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
{
size_t input_rows_count = vec_precision.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
format(value_from, buf_to, vec_precision[i]);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
/// For operations with Decimal
template <typename FirstArgVectorType>
void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
/// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77.
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
if (precision > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
for (size_t i = 0; i < input_rows_count; ++i)
{
writeText(vec_from[i], from_scale, buf_to, true, true, precision);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgVectorType>
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
size_t input_rows_count = vec_from.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <typename FirstArgType>
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
{
size_t input_rows_count = vec_precision.size();
result_offsets.resize(input_rows_count);
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
for (size_t i = 0; i < input_rows_count; ++i)
{
if (vec_precision[i] > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]);
writeChar(0, buf_to);
result_offsets[i] = buf_to.count();
}
buf_to.finalize();
}
template <is_floating_point T>
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
{
/// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values,
/// Catch this here to give user a more reasonable error.
if (precision > 60)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too high precision requested for Float, must not be more than 60, got {}", Int8(precision));
DB::DoubleConverter<false>::BufferType buffer;
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
const auto result = DB::DoubleConverter<false>::instance().ToFixed(value, precision, &builder);
if (!result)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value);
out.write(buffer, builder.position());
writeChar(0, out);
}
template <is_integer T>
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
{
/// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals).
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
if (precision > max_digits)
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
"Too many fractional digits requested, shall not be more than {}", max_digits);
writeText(value, out);
if (precision > 0) [[likely]]
{
writeChar('.', out);
for (int i = 0; i < precision; ++i)
writeChar('0', out);
writeChar(0, out);
}
}
public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
switch (arguments[0].type->getTypeId())
{
case TypeIndex::UInt8: return executeType<UInt8>(arguments);
case TypeIndex::UInt16: return executeType<UInt16>(arguments);
case TypeIndex::UInt32: return executeType<UInt32>(arguments);
case TypeIndex::UInt64: return executeType<UInt64>(arguments);
case TypeIndex::UInt128: return executeType<UInt128>(arguments);
case TypeIndex::UInt256: return executeType<UInt256>(arguments);
case TypeIndex::Int8: return executeType<Int8>(arguments);
case TypeIndex::Int16: return executeType<Int16>(arguments);
case TypeIndex::Int32: return executeType<Int32>(arguments);
case TypeIndex::Int64: return executeType<Int64>(arguments);
case TypeIndex::Int128: return executeType<Int128>(arguments);
case TypeIndex::Int256: return executeType<Int256>(arguments);
case TypeIndex::Float32: return executeType<Float32>(arguments);
case TypeIndex::Float64: return executeType<Float64>(arguments);
case TypeIndex::Decimal32: return executeType<Decimal32>(arguments);
case TypeIndex::Decimal64: return executeType<Decimal64>(arguments);
case TypeIndex::Decimal128: return executeType<Decimal128>(arguments);
case TypeIndex::Decimal256: return executeType<Decimal256>(arguments);
default:
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
}
}
private:
template <typename T>
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
{
const auto * from_col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
const auto * precision_col = checkAndGetColumn<ColumnVector<UInt8>>(arguments[1].column.get());
const auto * precision_col_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
auto result_col = ColumnString::create();
auto * result_col_string = assert_cast<ColumnString *>(result_col.get());
ColumnString::Chars & result_chars = result_col_string->getChars();
ColumnString::Offsets & result_offsets = result_col_string->getOffsets();
if constexpr (is_decimal<T>)
{
const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
UInt8 from_scale = from_col->getScale();
if (from_col)
{
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
else
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale);
}
else if (from_col_const)
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets, from_scale);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
}
else
{
const auto * from_col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
if (from_col)
{
if (precision_col_const)
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets);
else
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets);
}
else if (from_col_const)
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets);
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
}
return result_col;
}
};
}

View File

@ -1,9 +1,12 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
#include <memory>
#include <unordered_map>
namespace DB
{
@ -46,7 +49,7 @@ namespace
return res;
}
/// Сount how many times each bigram occurs in the text.
/// Count how many times each bigram occurs in the text.
template <typename ModelMap>
ALWAYS_INLINE inline void calculateStats(
const UInt8 * data,
@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset)
}
}
#endif

View File

@ -5,19 +5,17 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Common/isValidUTF8.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
#include <Interpreters/Context.h>
#include <compact_lang_det.h>
namespace DB
{
/* Determine language of Unicode UTF-8 text.

View File

@ -1,4 +1,7 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
}
}
#endif

View File

@ -1,4 +1,7 @@
#include <Common/FrequencyHolder.h>
#if USE_NLP
#include <Common/StringUtils/StringUtils.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsTextClassification.h>
@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
}
}
#endif

View File

@ -4,14 +4,18 @@
#include <mutex>
#include <Core/Types.h>
#include <Common/ProfileEvents.h>
#include <IO/OpenedFile.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>
#include <Common/ProfileEvents.h>
#include <city.h>
namespace ProfileEvents
{
extern const Event OpenedFileCacheHits;
extern const Event OpenedFileCacheMisses;
extern const Event OpenedFileCacheMicroseconds;
}
namespace DB
@ -26,57 +30,79 @@ namespace DB
*/
class OpenedFileCache
{
private:
using Key = std::pair<std::string /* path */, int /* flags */>;
class OpenedFileMap
{
using Key = std::pair<std::string /* path */, int /* flags */>;
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
using Files = std::map<Key, OpenedFileWeakPtr>;
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
using Files = std::map<Key, OpenedFileWeakPtr>;
Files files;
std::mutex mutex;
Files files;
std::mutex mutex;
public:
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
OpenedFilePtr get(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
if (!inserted)
{
if (auto res = it->second.lock())
{
ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
return res;
}
}
ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
OpenedFilePtr res
{
new OpenedFile(path, flags),
[key, this](auto ptr)
{
{
std::lock_guard another_lock(mutex);
files.erase(key);
}
delete ptr;
}
};
it->second = res;
return res;
}
void remove(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
files.erase(key);
}
};
static constexpr size_t buckets = 1024;
std::vector<OpenedFileMap> impls{buckets};
public:
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
using OpenedFilePtr = OpenedFileMap::OpenedFilePtr;
OpenedFilePtr get(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
if (!inserted)
{
if (auto res = it->second.lock())
{
ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
return res;
}
}
ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
OpenedFilePtr res
{
new OpenedFile(path, flags),
[key, this](auto ptr)
{
{
std::lock_guard another_lock(mutex);
files.erase(key);
}
delete ptr;
}
};
it->second = res;
return res;
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
return impls[bucket].get(path, flags);
}
void remove(const std::string & path, int flags)
{
Key key(path, flags);
std::lock_guard lock(mutex);
files.erase(key);
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
impls[bucket].remove(path, flags);
}
static OpenedFileCache & instance()
@ -87,5 +113,4 @@ public:
};
using OpenedFileCachePtr = std::shared_ptr<OpenedFileCache>;
}

View File

@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf)
inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); }
template <typename T>
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
bool fixed_fractional_length, UInt32 fractional_length)
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
{
/// If it's big integer, but the number of digits is small,
/// use the implementation for smaller integers for more efficient arithmetic.
if constexpr (std::is_same_v<T, Int256>)
{
if (x <= std::numeric_limits<UInt32>::max())
{
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
return;
}
else if (x <= std::numeric_limits<UInt64>::max())
{
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
return;
}
else if (x <= std::numeric_limits<UInt128>::max())
{
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros);
return;
}
}
@ -932,36 +932,24 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
{
if (x <= std::numeric_limits<UInt32>::max())
{
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
return;
}
else if (x <= std::numeric_limits<UInt64>::max())
{
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
return;
}
}
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
assert(scale <= max_digits);
assert(fractional_length <= max_digits);
char buf[max_digits];
memset(buf, '0', std::max(scale, fractional_length));
memset(buf, '0', scale);
T value = x;
Int32 last_nonzero_pos = 0;
if (fixed_fractional_length && fractional_length < scale)
{
T new_value = value / DecimalUtils::scaleMultiplier<Int256>(scale - fractional_length - 1);
auto round_carry = new_value % 10;
value = new_value / 10;
if (round_carry >= 5)
value += 1;
}
for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos)
for (Int32 pos = scale - 1; pos >= 0; --pos)
{
auto remainder = value % 10;
value /= 10;
@ -973,12 +961,11 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
}
writeChar('.', ostr);
ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1));
ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1);
}
template <typename T>
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
bool fixed_fractional_length = false, UInt32 fractional_length = 0)
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
{
T part = DecimalUtils::getWholePart(x, scale);
@ -989,7 +976,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
writeIntText(part, ostr);
if (scale || (fixed_fractional_length && fractional_length > 0))
if (scale)
{
part = DecimalUtils::getFractionalPart(x, scale);
if (part || trailing_zeros)
@ -997,7 +984,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
if (part < 0)
part *= T(-1);
writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
writeDecimalFractional(part, scale, ostr, trailing_zeros);
}
}
}

View File

@ -144,12 +144,6 @@ public:
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
Address(
const String & host_port_,
const ClusterConnectionParameters & params,
UInt32 shard_index_,
UInt32 replica_index_);
Address(
const DatabaseReplicaInfo & info,
const ClusterConnectionParameters & params,

View File

@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard(
{
remote_shards.emplace_back(Shard{
.query = query_ast,
.main_table = main_table,
.header = header,
.shard_info = shard_info,
.lazy = lazy,

View File

@ -50,6 +50,8 @@ public:
{
/// Query and header may be changed depending on shard.
ASTPtr query;
/// Used to check the table existence on remote node
StorageID main_table;
Block header;
Cluster::ShardInfo shard_info;

View File

@ -35,7 +35,12 @@ namespace ErrorCodes
namespace ClusterProxy
{
ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log)
ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
ContextPtr context,
const Settings & settings,
const StorageID & main_table,
const SelectQueryInfo * query_info,
Poco::Logger * log)
{
Settings new_settings = settings;
new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c
/// If "secret" (in remote_servers) is not in use,
/// user on the shard is not the same as the user on the initiator,
/// hence per-user limits should not be applied.
if (cluster.getSecret().empty())
if (!interserver_mode)
{
/// Does not matter on remote servers, because queries are sent under different user.
new_settings.max_concurrent_queries_for_user = 0;
@ -170,17 +175,15 @@ void executeQuery(
std::vector<QueryPlanPtr> plans;
SelectStreamFactory::Shards remote_shards;
auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log);
auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log);
new_context->increaseDistributedDepth();
size_t shards = query_info.getCluster()->getShardCount();
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
{
ASTPtr query_ast_for_shard;
if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
ASTPtr query_ast_for_shard = query_ast->clone();
if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
{
query_ast_for_shard = query_ast->clone();
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
sharding_key_expr,
sharding_key_expr->getSampleBlock().getByPosition(0).type,
@ -191,8 +194,6 @@ void executeQuery(
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
visitor.visit(query_ast_for_shard);
}
else
query_ast_for_shard = query_ast->clone();
if (shard_filter_generator)
{

View File

@ -34,8 +34,12 @@ class SelectStreamFactory;
/// - optimize_skip_unused_shards_nesting
///
/// @return new Context with adjusted settings
ContextMutablePtr updateSettingsForCluster(
const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
ContextPtr context,
const Settings & settings,
const StorageID & main_table,
const SelectQueryInfo * query_info = nullptr,
Poco::Logger * log = nullptr);
using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.

View File

@ -2274,8 +2274,7 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
&& !settings.allow_experimental_query_deduplication
&& !settings.empty_result_for_aggregation_by_empty_set
&& storage
&& storage->getName() != "MaterializedMySQL"
&& !storage->hasLightweightDeletedMask()
&& storage->supportsTrivialCountOptimization()
&& query_info.filter_asts.empty()
&& query_analyzer->hasAggregation()
&& (query_analyzer->aggregates().size() == 1)

View File

@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
}
else if (const auto * explain_query = ast->as<ASTExplainQuery>())
{
if (explain_query->settings_ast)
InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
}
else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))

View File

@ -1900,6 +1900,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
}
bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral)
return false;
String s;
ReadBufferFromMemory in(pos->begin, pos->size());
try
{
if (pos->type == TokenType::StringLiteral)
readQuotedStringWithSQLStyle(s, in);
else
readDoubleQuotedStringWithSQLStyle(s, in);
}
catch (const Exception &)
{
expected.add(pos, "string literal or double quoted string");
return false;
}
if (in.count() != pos->size())
{
expected.add(pos, "string literal or double quoted string");
return false;
}
auto literal = std::make_shared<ASTLiteral>(s);
literal->begin = pos;
literal->end = ++pos;
node = literal;
return true;
}
bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
if (pos->type != TokenType::DoubleAt)

View File

@ -367,6 +367,21 @@ protected:
};
/** MySQL comment:
* CREATE TABLE t (
* i INT PRIMARY KEY,
* first_name VARCHAR(255) COMMENT 'FIRST_NAME',
* last_name VARCHAR(255) COMMENT "LAST_NAME"
* )
*/
class ParserMySQLComment : public IParserBase
{
protected:
const char * getName() const override { return "MySQL comment parser"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
/** MySQL-style global variable: @@var
*/
class ParserMySQLGlobalVariable : public IParserBase

View File

@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node,
OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique<ParserAlwaysTrue>()),
OptionDescribe("UNIQUE", "unique_key", std::make_unique<ParserAlwaysTrue>()),
OptionDescribe("KEY", "primary_key", std::make_unique<ParserAlwaysTrue>()),
OptionDescribe("COMMENT", "comment", std::make_unique<ParserStringLiteral>()),
OptionDescribe("COMMENT", "comment", std::make_unique<ParserMySQLComment>()),
OptionDescribe("CHARACTER SET", "charset_name", std::make_unique<ParserCharsetOrCollateName>()),
OptionDescribe("CHARSET", "charset", std::make_unique<ParserCharsetOrCollateName>()),
OptionDescribe("COLLATE", "collate", std::make_unique<ParserCharsetOrCollateName>()),

View File

@ -3,6 +3,7 @@
#include <Parsers/CommonParsers.h>
#include <Parsers/ParserDescribeTableQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Common/typeid_cast.h>
@ -16,8 +17,10 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
ParserKeyword s_describe("DESCRIBE");
ParserKeyword s_desc("DESC");
ParserKeyword s_table("TABLE");
ParserKeyword s_settings("SETTINGS");
ParserToken s_dot(TokenType::Dot);
ParserIdentifier name_p;
ParserSetQuery parser_settings(true);
ASTPtr database;
ASTPtr table;
@ -29,12 +32,21 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
s_table.ignore(pos, expected);
ASTPtr table_expression;
if (!ParserTableExpression().parse(pos, table_expression, expected))
if (!ParserTableExpression().parse(pos, query->table_expression, expected))
return false;
query->children.push_back(std::move(table_expression));
query->table_expression = query->children.back();
/// For compatibility with SELECTs, where SETTINGS can be in front of FORMAT
ASTPtr settings;
if (s_settings.ignore(pos, expected))
{
if (!parser_settings.parse(pos, query->settings_ast, expected))
return false;
}
query->children.push_back(query->table_expression);
if (query->settings_ast)
query->children.push_back(query->settings_ast);
node = query;

View File

@ -156,7 +156,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
// SETTINGS key1 = value1, key2 = value2, ...
ParserKeyword s_settings("SETTINGS");
if (s_settings.ignore(pos, expected))
if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
{
ParserSetQuery parser_settings(true);
if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))

View File

@ -14,8 +14,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
{
ParserKeyword s_exists("EXISTS");
ParserKeyword s_temporary("TEMPORARY");
ParserKeyword s_describe("DESCRIBE");
ParserKeyword s_desc("DESC");
ParserKeyword s_show("SHOW");
ParserKeyword s_create("CREATE");
ParserKeyword s_database("DATABASE");

View File

@ -182,6 +182,9 @@ bool applyTrivialCountIfPossible(
return false;
const auto & storage = table_node.getStorage();
if (!storage->supportsTrivialCountOptimization())
return false;
auto storage_id = storage->getStorageID();
auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(),
storage_id.getTableName(),

View File

@ -162,7 +162,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
if (my_table_func_ptr)
try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, &current_settings, PoolMode::GET_MANY);
else
try_results = my_shard.shard_info.pool->getManyChecked(timeouts, &current_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName());
try_results = my_shard.shard_info.pool->getManyChecked(
timeouts, &current_settings, PoolMode::GET_MANY,
my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName());
}
catch (const Exception & ex)
{
@ -241,7 +243,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
remote_query_executor->setPoolMode(PoolMode::GET_MANY);
if (!table_func_ptr)
remote_query_executor->setMainTable(main_table);
remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table);
pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
addConvertingActions(pipes.back(), output_stream->header);

View File

@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr<Throttler>;
class ReadFromRemote final : public ISourceStep
{
public:
/// @param main_table_ if Shards contains main_table then this parameter will be ignored
ReadFromRemote(
ClusterProxy::SelectStreamFactory::Shards shards_,
Block header_,

View File

@ -6,10 +6,18 @@
#include <Poco/Util/LayeredConfiguration.h>
#include <IO/HTTPCommon.h>
#include <Common/getResource.h>
#include <re2/re2.h>
#include <incbin.h>
#include "config.h"
/// Embedded HTML pages
INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html");
INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html");
INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js");
namespace DB
{
@ -34,13 +42,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
if (request.getURI().starts_with("/play"))
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("play.html");
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
}
else if (request.getURI().starts_with("/dashboard"))
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
std::string html(getResource("dashboard.html"));
std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
/// Replace a link to external JavaScript file to embedded file.
/// This allows to open the HTML without running a server and to host it on server.
@ -55,7 +63,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
else if (request.getURI() == "/js/uplot.js")
{
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
*response.send() << getResource("js/uplot.js");
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
}
else
{

View File

@ -254,6 +254,9 @@ public:
/// because those are internally translated into 'ALTER UDPATE' mutations.
virtual bool supportsDelete() const { return false; }
/// Return true if the trivial count query could be optimized without reading the data at all.
virtual bool supportsTrivialCountOptimization() const { return false; }
private:
StorageID storage_id;

View File

@ -434,6 +434,8 @@ public:
bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; }
bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); }
NamesAndTypesList getVirtuals() const override;
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override;

View File

@ -60,7 +60,6 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterDescribeQuery.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
#include <Interpreters/InterpreterInsertQuery.h>
@ -75,6 +74,7 @@
#include <Interpreters/getTableExpressions.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
@ -434,7 +434,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
{
/// Always calculate optimized cluster here, to avoid conditions during read()
/// (Anyway it will be calculated in the read())
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info);
if (optimized_cluster)
{
LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
@ -1297,7 +1297,7 @@ ClusterPtr StorageDistributed::getCluster() const
}
ClusterPtr StorageDistributed::getOptimizedCluster(
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const
{
ClusterPtr cluster = getCluster();
const Settings & settings = local_context->getSettingsRef();
@ -1306,7 +1306,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
if (has_sharding_key && sharding_key_is_usable)
{
ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context);
ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context);
if (optimized)
return optimized;
}
@ -1355,25 +1355,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c
/// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr`
ClusterPtr StorageDistributed::skipUnusedShards(
ClusterPtr cluster,
const ASTPtr & query_ptr,
const SelectQueryInfo & query_info,
const StorageSnapshotPtr & storage_snapshot,
ContextPtr local_context) const
{
const auto & select = query_ptr->as<ASTSelectQuery &>();
const auto & select = query_info.query->as<ASTSelectQuery &>();
if (!select.prewhere() && !select.where())
{
return nullptr;
}
/// FIXME: support analyzer
if (!query_info.syntax_analyzer_result)
return nullptr;
ASTPtr condition_ast;
if (select.prewhere() && select.where())
/// Remove JOIN from the query since it may contain a condition for other tables.
/// But only the conditions for the left table should be analyzed for shard skipping.
{
condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone());
}
else
{
condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone();
ASTPtr select_without_join_ptr = select.clone();
ASTSelectQuery select_without_join = select_without_join_ptr->as<ASTSelectQuery &>();
TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result;
removeJoin(select_without_join, analyzer_result_without_join, local_context);
if (!select_without_join.prewhere() && !select_without_join.where())
return nullptr;
if (select_without_join.prewhere() && select_without_join.where())
condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone());
else
condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone();
}
replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot);
@ -1396,11 +1405,9 @@ ClusterPtr StorageDistributed::skipUnusedShards(
return nullptr;
}
// Can't get definite answer if we can skip any shards
// Can't get a definite answer if we can skip any shards
if (!blocks)
{
return nullptr;
}
std::set<int> shards;

View File

@ -182,10 +182,10 @@ private:
/// Apply the following settings:
/// - optimize_skip_unused_shards
/// - force_optimize_skip_unused_shards
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const;
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const;
ClusterPtr skipUnusedShards(
ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
/// This method returns optimal query processing stage.
///

View File

@ -41,6 +41,8 @@ public:
void drop() override { nested_storage->drop(); }
bool supportsTrivialCountOptimization() const override { return false; }
private:
[[noreturn]] static void throwNotAllowed()
{

View File

@ -19,6 +19,7 @@
#include <Processors/Sinks/SinkToStorage.h>
#include <QueryPipeline/Pipe.h>
#include <Common/parseRemoteDescription.h>
#include <Common/quoteString.h>
#include <Common/logger_useful.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Databases/MySQL/FetchTablesColumnsList.h>
@ -34,16 +35,6 @@ namespace ErrorCodes
extern const int UNKNOWN_TABLE;
}
static String backQuoteMySQL(const String & x)
{
String res(x.size(), '\0');
{
WriteBufferFromString wb(res);
writeBackQuotedStringMySQL(x, wb);
}
return res;
}
StorageMySQL::StorageMySQL(
const StorageID & table_id_,
mysqlxx::PoolWithFailover && pool_,

View File

@ -4902,67 +4902,102 @@ void StorageReplicatedMergeTree::read(
snapshot_data.alter_conversions = {};
});
/** The `select_sequential_consistency` setting has two meanings:
* 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas.
* 2. Do not read parts that have not yet been written to the quorum of the replicas.
* For this you have to synchronously go to ZooKeeper.
*/
if (local_context->getSettingsRef().select_sequential_consistency)
{
auto max_added_blocks = std::make_shared<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock>(getMaxAddedBlocks());
if (auto plan = reader.read(
column_names, storage_snapshot, query_info, local_context,
max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false))
query_plan = std::move(*plan);
return;
}
const auto & settings = local_context->getSettingsRef();
/// The `select_sequential_consistency` setting has two meanings:
/// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas.
/// 2. Do not read parts that have not yet been written to the quorum of the replicas.
/// For this you have to synchronously go to ZooKeeper.
if (settings.select_sequential_consistency)
return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
if (local_context->canUseParallelReplicasOnInitiator())
return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
}
void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams)
{
auto max_added_blocks = std::make_shared<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock>(getMaxAddedBlocks());
auto plan = reader.read(column_names, storage_snapshot, query_info, local_context,
max_block_size, num_streams, processed_stage, std::move(max_added_blocks),
/* enable_parallel_reading= */false);
if (plan)
query_plan = std::move(*plan);
}
void StorageReplicatedMergeTree::readParallelReplicasImpl(
QueryPlan & query_plan,
const Names & /*column_names*/,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
const size_t /*max_block_size*/,
const size_t /*num_streams*/)
{
auto table_id = getStorageID();
auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas);
ASTPtr modified_query_ast;
Block header;
if (local_context->getSettingsRef().allow_experimental_analyzer)
{
auto table_id = getStorageID();
auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
ASTPtr modified_query_ast;
Block header;
if (local_context->getSettingsRef().allow_experimental_analyzer)
{
auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
header = InterpreterSelectQueryAnalyzer::getSampleBlock(
modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
}
else
{
modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
header
= InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
}
auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas);
ClusterProxy::SelectStreamFactory select_stream_factory =
ClusterProxy::SelectStreamFactory(
header,
{},
storage_snapshot,
processed_stage);
ClusterProxy::executeQueryWithParallelReplicas(
query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr,
select_stream_factory, modified_query_ast,
local_context, query_info, cluster);
header = InterpreterSelectQueryAnalyzer::getSampleBlock(
modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
}
else
{
if (auto plan = reader.read(
column_names, storage_snapshot, query_info,
local_context, max_block_size, num_streams,
processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower()))
query_plan = std::move(*plan);
modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
header
= InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
}
ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory(
header,
{},
storage_snapshot,
processed_stage);
ClusterProxy::executeQueryWithParallelReplicas(
query_plan, getStorageID(),
/* table_func_ptr= */ nullptr,
select_stream_factory, modified_query_ast,
local_context, query_info, parallel_replicas_cluster);
}
void StorageReplicatedMergeTree::readLocalImpl(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
const size_t max_block_size,
const size_t num_streams)
{
auto plan = reader.read(
column_names, storage_snapshot, query_info,
local_context, max_block_size, num_streams,
processed_stage,
/* max_block_numbers_to_read= */ nullptr,
/* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower());
if (plan)
query_plan = std::move(*plan);
}
template <class Func>

View File

@ -130,7 +130,7 @@ public:
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams) override;
@ -513,6 +513,36 @@ private:
static std::optional<QueryPipeline> distributedWriteFromClusterStorage(const std::shared_ptr<IStorageCluster> & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context);
void readLocalImpl(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams);
void readLocalSequentialConsistencyImpl(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams);
void readParallelReplicasImpl(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr local_context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams);
template <class Func>
void foreachActiveParts(Func && func, bool select_sequential_consistency) const;

View File

@ -30,7 +30,6 @@ endif()
add_dependencies(generate-source generate-contributors)
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
add_custom_command(
OUTPUT StorageSystemLicenses.generated.cpp
@ -38,23 +37,13 @@ add_custom_command(
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
# Overlength strings
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
clickhouse_embed_binaries(
TARGET information_schema_metadata
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
RESOURCES schemata.sql tables.sql views.sql columns.sql
)
list (SORT storages_system_sources) # Reproducible build
add_library(clickhouse_storages_system ${storages_system_sources})
add_dependencies(clickhouse_storages_system information_schema_metadata)
target_link_libraries(clickhouse_storages_system PRIVATE
dbms
common
@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
clickhouse_common_zookeeper
clickhouse_parsers
Poco::JSON
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
)
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)

View File

@ -3,14 +3,23 @@
#include <Storages/System/attachSystemTablesImpl.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Common/getResource.h>
#include <incbin.h>
#include "config.h"
/// Embedded SQL definitions
INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql");
INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql");
INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql");
INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql");
namespace DB
{
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name)
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
{
try
{
@ -21,12 +30,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
String metadata_resource_name = view_name + ".sql";
auto attach_query = getResource(metadata_resource_name);
if (attach_query.empty())
if (query.empty())
return;
ParserCreateQuery parser;
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(),
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
"Attach query from embedded resource " + metadata_resource_name,
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
@ -50,10 +58,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
{
createInformationSchemaView(context, information_schema_database, "schemata");
createInformationSchemaView(context, information_schema_database, "tables");
createInformationSchemaView(context, information_schema_database, "views");
createInformationSchemaView(context, information_schema_database, "columns");
createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
}
}

View File

@ -2,7 +2,6 @@
#include <Interpreters/Cluster.h>
#include <Interpreters/Context.h>
#include <Interpreters/ClusterProxy/executeQuery.h>
#include <Interpreters/InterpreterDescribeQuery.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeString.h>
@ -58,7 +57,7 @@ ColumnsDescription getStructureOfRemoteTableInShard(
}
ColumnsDescription res;
auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id);
auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id);
/// Ignore limit for result number of rows (that could be set during handling CSE/CTE),
/// since this is a service query and should not lead to query failure.
@ -177,7 +176,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
const auto & shards_info = cluster.getShardsInfo();
auto query = "DESC TABLE " + remote_table_id.getFullTableName();
auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id);
auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id);
new_context->setSetting("describe_extend_object_types", true);
/// Expect only needed columns from the result of DESC TABLE.

View File

@ -264,7 +264,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
secure,
/* priority= */ Priority{1},
/* cluster_name= */ "",
/* password= */ ""
/* cluster_secret= */ ""
};
cluster = std::make_shared<Cluster>(context->getSettingsRef(), names, params);
}

View File

@ -162,3 +162,5 @@ endif ()
if (TARGET ch_contrib::fiu)
set(FIU_ENABLE 1)
endif()
set(SOURCE_DIR ${CMAKE_SOURCE_DIR})

View File

@ -130,4 +130,6 @@
02581_share_big_sets_between_mutation_tasks_long
02581_share_big_sets_between_multiple_mutations_tasks_long
00992_system_parts_race_condition_zookeeper_long
02790_optimize_skip_unused_shards_join
01940_custom_tld_sharding_key
02815_range_dict_no_direct_join

View File

@ -529,6 +529,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val):
return gen
# To keep dependency list as short as possible, tzdata is not used here (to
# avoid try/except block for import)
def get_localzone():
return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:]))
class SettingsRandomizer:
settings = {
"max_insert_threads": lambda: 0
@ -602,20 +608,33 @@ class SettingsRandomizer:
"enable_memory_bound_merging_of_aggregation_results": lambda: random.randint(
0, 1
),
"session_timezone": lambda: random.choice(
[
# special non-deterministic around 1970 timezone, see [1].
#
# [1]: https://github.com/ClickHouse/ClickHouse/issues/42653
"America/Mazatlan",
"America/Hermosillo",
"Mexico/BajaSur",
# server default that is randomized across all timezones
# NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same.
get_localzone(),
]
),
}
@staticmethod
def get_random_settings(args):
random_settings = []
random_settings = {}
is_debug = BuildFlags.DEBUG in args.build_flags
for setting, generator in SettingsRandomizer.settings.items():
if (
is_debug
and setting == "allow_prefetched_read_pool_for_remote_filesystem"
):
random_settings.append(f"{setting}=0")
random_settings[setting] = 0
else:
random_settings.append(f"{setting}={generator()}")
random_settings[setting] = generator()
return random_settings
@ -651,10 +670,10 @@ class MergeTreeSettingsRandomizer:
@staticmethod
def get_random_settings(args):
random_settings = []
random_settings = {}
for setting, generator in MergeTreeSettingsRandomizer.settings.items():
if setting not in args.changed_merge_tree_settings:
random_settings.append(f"{setting}={generator()}")
random_settings[setting] = generator()
return random_settings
@ -766,7 +785,14 @@ class TestCase:
@staticmethod
def cli_format_settings(settings_list) -> str:
return " ".join([f"--{setting}" for setting in settings_list])
out = []
for k, v in settings_list.items():
out.extend([f"--{k}", str(v)])
return " ".join(out)
@staticmethod
def http_format_settings(settings_list) -> str:
return urllib.parse.urlencode(settings_list)
def has_show_create_table_in_test(self):
return not subprocess.call(["grep", "-iq", "show create", self.case_file])
@ -774,11 +800,12 @@ class TestCase:
def add_random_settings(self, client_options):
new_options = ""
if self.randomize_settings:
http_params = self.http_format_settings(self.random_settings)
if len(self.base_url_params) == 0:
os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
os.environ["CLICKHOUSE_URL_PARAMS"] = http_params
else:
os.environ["CLICKHOUSE_URL_PARAMS"] = (
self.base_url_params + "&" + "&".join(self.random_settings)
self.base_url_params + "&" + http_params
)
new_options += f" {self.cli_format_settings(self.random_settings)}"

View File

@ -3199,6 +3199,7 @@ class ClickHouseInstance:
):
self.name = name
self.base_cmd = cluster.base_cmd
self.base_dir = base_path
self.docker_id = cluster.get_instance_docker_id(self.name)
self.cluster = cluster
self.hostname = hostname if hostname is not None else self.name
@ -4193,6 +4194,14 @@ class ClickHouseInstance:
["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]
)
def put_users_config(self, config_path):
"""Put new config (useful if you cannot put it at the start)"""
instance_config_dir = p.abspath(p.join(self.path, "configs"))
users_d_dir = p.abspath(p.join(instance_config_dir, "users.d"))
config_path = p.join(self.base_dir, config_path)
shutil.copy(config_path, users_d_dir)
def create_dir(self):
"""Create the instance directory and all the needed files there."""

View File

@ -1581,6 +1581,128 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE utf8mb4_test")
def utf8mb4_column_test(clickhouse_node, mysql_node, service_name):
db = "utf8mb4_column_test"
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
mysql_node.query(f"CREATE DATABASE {db}")
# Full sync
mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)")
mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)")
mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())")
mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())")
clickhouse_node.query(
f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
)
# Full sync replicated unquoted columns names since they use SHOW CREATE TABLE
# which returns quoted column names
check_query(
clickhouse_node,
f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}",
"quoted\nunquoted\n",
)
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted",
"1\n",
)
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted",
"1\n",
)
# Inc sync
mysql_node.query(
f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)"
)
mysql_node.query(
f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)"
)
mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())")
mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())")
mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())")
mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())")
check_query(
clickhouse_node,
f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted",
"2\n",
)
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new",
"1\n",
)
check_query(
clickhouse_node,
f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted",
"2\n",
)
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new",
"1\n",
)
clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
def utf8mb4_name_test(clickhouse_node, mysql_node, service_name):
db = "您Hi您"
table = "日期"
mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
mysql_node.query(f"CREATE DATABASE `{db}`")
mysql_node.query(
f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
)
mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())")
mysql_node.query(
f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
)
mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())")
clickhouse_node.query(
f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
)
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`",
"1\n",
)
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`",
"1\n",
)
# Inc sync
mysql_node.query(
f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
)
mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())")
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`",
"1\n",
)
mysql_node.query(
f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
)
mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())")
check_query(
clickhouse_node,
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`",
"1\n",
)
clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
def system_parts_test(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE IF EXISTS system_parts_test")
clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test")
@ -1701,6 +1823,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_
mysql_node.query("DROP DATABASE materialized_with_column_comments_test")
def double_quoted_comment(clickhouse_node, mysql_node, service_name):
db = "comment_db"
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
mysql_node.query(f"CREATE DATABASE {db}")
mysql_node.query(
f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")'
)
mysql_node.query(
f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')"
)
clickhouse_node.query(
f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
)
check_query(
clickhouse_node,
f"SHOW TABLES FROM {db} FORMAT TSV",
"t1\nt2\n",
)
# incremental
mysql_node.query(
f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")'
)
mysql_node.query(
f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')"
)
check_query(
clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n"
)
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name):
mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test")
clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test")

View File

@ -381,6 +381,12 @@ def test_utf8mb4(
):
materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57")
materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80")
materialized_with_ddl.utf8mb4_column_test(
clickhouse_node, started_mysql_8_0, "mysql80"
)
materialized_with_ddl.utf8mb4_name_test(
clickhouse_node, started_mysql_8_0, "mysql80"
)
def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node):
@ -422,6 +428,12 @@ def test_materialized_with_column_comments(
)
def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node):
materialized_with_ddl.double_quoted_comment(
clickhouse_node, started_mysql_8_0, "mysql80"
)
def test_materialized_with_enum(
started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node
):

View File

@ -114,7 +114,10 @@ def node_update_config(mode, setting, value=None):
def assert_took(took, should_took):
assert took >= should_took[0] * 0.9 and took < should_took[1]
# we need to decrease the lower limit because the server limits could
# be enforced by throttling some server background IO instead of query IO
# and we have no control over it
assert took >= should_took[0] * 0.85 and took < should_took[1]
@pytest.mark.parametrize(

View File

@ -0,0 +1,7 @@
<clickhouse>
<profiles>
<default>
<force_remove_data_recursively_on_drop>1</force_remove_data_recursively_on_drop>
</default>
</profiles>
</clickhouse>

View File

@ -51,6 +51,12 @@ def start_cluster():
cluster.shutdown()
def restart_node(node):
# set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old)
node.put_users_config("configs/force_remove_data_recursively_on_drop.xml")
node.restart_with_latest_version(signal=9, fix_metadata=True)
def test_mutate_and_upgrade(start_cluster):
for node in [node1, node2]:
node.query("DROP TABLE IF EXISTS mt")
@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster):
node2.query("DETACH TABLE mt") # stop being leader
node1.query("DETACH TABLE mt") # stop being leader
node1.restart_with_latest_version(signal=9, fix_metadata=True)
node2.restart_with_latest_version(signal=9, fix_metadata=True)
restart_node(node1)
restart_node(node2)
# After hard restart table can be in readonly mode
exec_query_with_retry(
@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster):
# (We could be in process of creating some system table, which will leave empty directory on restart,
# so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
node3.query("SYSTEM FLUSH LOGS")
node3.restart_with_latest_version(signal=9, fix_metadata=True)
restart_node(node3)
# checks for readonly
exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)

View File

@ -5,4 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
env TZ=UTC ${CLICKHOUSE_CLIENT} --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)"
# NOTE: session_timezone overrides use_client_time_zone, disable it randomization
env TZ=UTC ${CLICKHOUSE_CLIENT} --session_timezone '' --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)"

View File

@ -7,11 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
function perform()
{
local query=$1
TZ=UTC $CLICKHOUSE_CLIENT \
--allow_deprecated_syntax_for_merge_tree=1 \
--use_client_time_zone=1 \
--input_format_values_interpret_expressions=0 \
--query "$query" 2>/dev/null
local settings=(
--allow_deprecated_syntax_for_merge_tree 1
--session_timezone UTC
--input_format_values_interpret_expressions 0
)
TZ=UTC $CLICKHOUSE_CLIENT "${settings[@]}" --query "$query" 2>/dev/null
if [ "$?" -ne 0 ]; then
echo "query failed"
fi

View File

@ -1,3 +1,15 @@
-- disable timezone randomization since otherwise TTL may fail at particular datetime, i.e.:
--
-- SELECT
-- now(),
-- toDate(toTimeZone(now(), 'America/Mazatlan')),
-- today()
--
-- ┌───────────────now()─┬─toDate(toTimeZone(now(), 'America/Mazatlan'))─┬────today()─┐
-- │ 2023-07-24 06:24:06 │ 2023-07-23 │ 2023-07-24 │
-- └─────────────────────┴───────────────────────────────────────────────┴────────────┘
set session_timezone = '';
drop table if exists ttl_00933_1;
-- Column TTL works only with wide parts, because it's very expensive to apply it for compact parts

View File

@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
set -e -o pipefail
# Run the client.
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used
$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF'
DROP DATABASE IF EXISTS dictdb_01042;
CREATE DATABASE dictdb_01042;
CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple();

View File

@ -2,6 +2,9 @@
set mutations_sync = 2;
-- system.parts has server default, timezone cannot be randomized
set session_timezone = '';
drop table if exists ttl;
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d)

View File

@ -238,10 +238,6 @@ defaultValueOfArgumentType
defaultValueOfTypeName
degrees
demangle
detectCharset
detectLanguageUnknown
detectProgrammingLanguage
detectTonality
divide
dotProduct
dumpColumnStructure

View File

@ -15,5 +15,7 @@ AND name NOT IN (
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test
'lemmatize', 'tokenize', 'stem', 'synonyms',
'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
-- these functions are not enabled in fast test
) ORDER BY name;

View File

@ -5,7 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "
# NOTE: dictionaries will be updated according to server TZ, not session, so prohibit it's randomization
$CLICKHOUSE_CLIENT --session_timezone '' -q "
CREATE TABLE table_for_update_field_dictionary
(
key UInt64,

View File

@ -1,21 +0,0 @@
2.00000000000000000000000000000000000000000000000000000000000000000000000000000
2.12
-2.00000000000000000000000000000000000000000000000000000000000000000000000000000
-2.12
2.987600000000000033395508580724708735942840576171875000000000
2.15
-2.987600000000000033395508580724708735942840576171875000000000
-2.15
64.1230010986
64.2340000000
-64.1230010986
-64.2340000000
-32.345
32.34500000000000000000000000000000000000000000000000000000000000000000000000000
32.46
-64.5671232345
128.78932312332132985464
-128.78932312332132985464
128.78932312332132985464000000000000000000000000000000000000000000000000000000000
128.7893231233
-128.78932312332132985464123123789323123321329854600000000000000000000000000000000

View File

@ -1,35 +0,0 @@
-- Regular types
SELECT toDecimalString(2, 77); -- more digits required than exist
SELECT toDecimalString(2.123456, 2); -- rounding
SELECT toDecimalString(-2, 77); -- more digits required than exist
SELECT toDecimalString(-2.123456, 2); -- rounding
SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default)
SELECT toDecimalString(2.1456, 2); -- rounding
SELECT toDecimalString(-2.9876, 60); -- more digits required than exist
SELECT toDecimalString(-2.1456, 2); -- rounding
-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway.
SELECT toDecimalString(64.123::Float32, 10);
SELECT toDecimalString(64.234::Float64, 10);
SELECT toDecimalString(-64.123::Float32, 10);
SELECT toDecimalString(-64.234::Float64, 10);
-- Decimals
SELECT toDecimalString(-32.345::Decimal32(3), 3);
SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist
SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding
SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10);
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20);
SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist
SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist
-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float.
-- These values shall work OK.
SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}

View File

@ -0,0 +1,10 @@
"id","Nullable(Int64)","","","","",""
"age","LowCardinality(UInt8)","","","","",""
"name","Nullable(String)","","","","",""
"status","Nullable(String)","","","","",""
"hobbies","Array(Nullable(String))","","","","",""
"id","Nullable(Int64)","","","","",""
"age","LowCardinality(UInt8)","","","","",""
"name","Nullable(String)","","","","",""
"status","Nullable(String)","","","","",""
"hobbies","Array(Nullable(String))","","","","",""

View File

@ -0,0 +1,3 @@
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 FORMAT CSV;
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1;
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 SETTINGS max_threads=0; -- { clientError SYNTAX_ERROR }

View File

@ -0,0 +1,55 @@
-- Issue: https://github.com/ClickHouse/ClickHouse/issues/15995
DROP TABLE IF EXISTS outer;
DROP TABLE IF EXISTS inner;
DROP TABLE IF EXISTS outer_distributed;
DROP TABLE IF EXISTS inner_distributed;
CREATE TABLE IF NOT EXISTS outer
(
`id` UInt64,
`organization_id` UInt64,
`version` UInt64
)
ENGINE = ReplacingMergeTree(version)
PARTITION BY organization_id % 8
ORDER BY (organization_id, id);
CREATE TABLE inner
(
`id` UInt64,
`outer_id` UInt64,
`organization_id` UInt64,
`version` UInt64,
`date` Date
)
ENGINE = ReplacingMergeTree(version)
PARTITION BY toYYYYMM(date)
ORDER BY (organization_id, outer_id);
CREATE TABLE inner_distributed AS inner
ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'inner', intHash64(organization_id));
CREATE TABLE outer_distributed AS outer
ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'outer', intHash64(organization_id));
SELECT
sum(if(inner_distributed.id != 0, 1, 0)) AS total,
inner_distributed.date AS date
FROM outer_distributed AS outer_distributed
FINAL
LEFT JOIN
(
SELECT
inner_distributed.outer_id AS outer_id,
inner_distributed.id AS id,
inner_distributed.date AS date
FROM inner_distributed AS inner_distributed
FINAL
WHERE inner_distributed.organization_id = 15078
) AS inner_distributed ON inner_distributed.outer_id = outer_distributed.id
WHERE (outer_distributed.organization_id = 15078) AND (date != toDate('1970-01-01'))
GROUP BY date
ORDER BY date DESC
SETTINGS distributed_product_mode = 'local', optimize_skip_unused_shards = 1;

View File

@ -0,0 +1,11 @@
explain
(Expression)
ExpressionTransform
(Aggregating)
FinalizeAggregatedTransform
AggregatingInOrderTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
MergeTreeInOrder 0 → 1

View File

@ -0,0 +1,18 @@
SET read_in_order_two_level_merge_threshold=1000000;
DROP TABLE IF EXISTS t;
CREATE TABLE t(a UInt64)
ENGINE = MergeTree
ORDER BY a;
INSERT INTO t SELECT * FROM numbers_mt(1e3);
OPTIMIZE TABLE t FINAL;
EXPLAIN PIPELINE
SELECT a
FROM t
GROUP BY a
FORMAT PrettySpace
SETTINGS optimize_aggregation_in_order = 1;
DROP TABLE t;