mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge remote-tracking branch 'origin/master' into ADQM-940
This commit is contained in:
commit
d4d381de7e
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -340,3 +340,6 @@
|
||||
[submodule "contrib/c-ares"]
|
||||
path = contrib/c-ares
|
||||
url = https://github.com/c-ares/c-ares.git
|
||||
[submodule "contrib/incbin"]
|
||||
path = contrib/incbin
|
||||
url = https://github.com/graphitemaster/incbin.git
|
||||
|
@ -1,58 +0,0 @@
|
||||
# Embed a set of resource files into a resulting object file.
|
||||
#
|
||||
# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
|
||||
#
|
||||
# This will generate a static library target named `<target>`, which contains the contents of
|
||||
# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
|
||||
# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
|
||||
#
|
||||
# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
|
||||
# These are:
|
||||
# 1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
|
||||
# 2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
|
||||
# 2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
|
||||
#
|
||||
# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
|
||||
# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
|
||||
# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
|
||||
macro(clickhouse_embed_binaries)
|
||||
set(one_value_args TARGET RESOURCE_DIR)
|
||||
set(resources RESOURCES)
|
||||
cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
|
||||
|
||||
if (NOT DEFINED EMBED_TARGET)
|
||||
message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED EMBED_RESOURCE_DIR)
|
||||
set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
endif()
|
||||
|
||||
list(LENGTH EMBED_RESOURCES N_RESOURCES)
|
||||
if (N_RESOURCES LESS 1)
|
||||
message(FATAL_ERROR "The list of binary resources to embed may not be empty")
|
||||
endif()
|
||||
|
||||
add_library("${EMBED_TARGET}" STATIC)
|
||||
set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
|
||||
|
||||
foreach(RESOURCE_FILE ${EMBED_RESOURCES})
|
||||
set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
|
||||
set(BINARY_FILE_NAME "${RESOURCE_FILE}")
|
||||
|
||||
# Normalize the name of the resource.
|
||||
string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
|
||||
string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
|
||||
|
||||
# Generate the configured assembly file in the output directory.
|
||||
configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
|
||||
|
||||
# Set the include directory for relative paths specified for `.incbin` directive.
|
||||
set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}")
|
||||
|
||||
target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}")
|
||||
set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}")
|
||||
endforeach()
|
||||
endmacro()
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq)
|
||||
add_contrib (nuraft-cmake NuRaft)
|
||||
add_contrib (fast_float-cmake fast_float)
|
||||
add_contrib (datasketches-cpp-cmake datasketches-cpp)
|
||||
add_contrib (incbin-cmake incbin)
|
||||
|
||||
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_NLP)
|
||||
add_contrib (libstemmer-c-cmake libstemmer_c)
|
||||
add_contrib (wordnet-blast-cmake wordnet-blast)
|
||||
add_contrib (lemmagen-c-cmake lemmagen-c)
|
||||
add_contrib (nlp-data-cmake nlp-data)
|
||||
add_contrib (cld2-cmake cld2)
|
||||
endif()
|
||||
|
||||
|
@ -1,4 +1,3 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")
|
||||
|
||||
set (SRCS
|
||||
@ -23,12 +22,10 @@ if (OS_FREEBSD)
|
||||
endif ()
|
||||
|
||||
# Related to time_zones table:
|
||||
# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build
|
||||
# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX
|
||||
# as the library that's built using embedded tzdata is also specific to OS_LINUX
|
||||
set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
|
||||
# TimeZones.generated.cpp is autogenerated each time during a build
|
||||
set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp")
|
||||
# remove existing copies so that its generated fresh on each build.
|
||||
file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
|
||||
file(REMOVE ${TIMEZONES_FILE})
|
||||
|
||||
# get the list of timezones from tzdata shipped with cctz
|
||||
set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
|
||||
@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
|
||||
set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
|
||||
message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")
|
||||
|
||||
set(TIMEZONE_RESOURCE_FILES)
|
||||
|
||||
# each file in that dir (except of tab and localtime) store the info about timezone
|
||||
execute_process(COMMAND
|
||||
bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -"
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE TIMEZONES)
|
||||
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" )
|
||||
file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
|
||||
file(APPEND ${TIMEZONES_FILE} "#include <incbin.h>\n")
|
||||
|
||||
set (COUNTER 1)
|
||||
foreach(TIMEZONE ${TIMEZONES})
|
||||
file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n")
|
||||
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||
endforeach(TIMEZONE)
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" )
|
||||
|
||||
foreach(TIMEZONE ${TIMEZONES})
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n")
|
||||
list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
|
||||
file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n")
|
||||
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||
endforeach(TIMEZONE)
|
||||
file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n")
|
||||
clickhouse_embed_binaries(
|
||||
TARGET tzdata
|
||||
RESOURCE_DIR "${TZDIR}"
|
||||
RESOURCES ${TIMEZONE_RESOURCE_FILES}
|
||||
)
|
||||
add_dependencies(_cctz tzdata)
|
||||
target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n")
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} "#include <string_view>\n\n")
|
||||
file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" )
|
||||
|
||||
set (COUNTER 1)
|
||||
foreach(TIMEZONE ${TIMEZONES})
|
||||
file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast<const char *>(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n")
|
||||
MATH(EXPR COUNTER "${COUNTER}+1")
|
||||
endforeach(TIMEZONE)
|
||||
|
||||
file(APPEND ${TIMEZONES_FILE} " return {};\n")
|
||||
file(APPEND ${TIMEZONES_FILE} "}\n")
|
||||
|
||||
add_library (tzdata ${TIMEZONES_FILE})
|
||||
target_link_libraries(tzdata ch_contrib::incbin)
|
||||
target_link_libraries(_cctz tzdata)
|
||||
|
||||
add_library(ch_contrib::cctz ALIAS _cctz)
|
||||
|
1
contrib/incbin
vendored
Submodule
1
contrib/incbin
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf
|
8
contrib/incbin-cmake/CMakeLists.txt
Normal file
8
contrib/incbin-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,8 @@
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin")
|
||||
add_library(_incbin INTERFACE)
|
||||
target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR})
|
||||
add_library(ch_contrib::incbin ALIAS _incbin)
|
||||
|
||||
# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled.
|
||||
# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning."
|
||||
target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING)
|
@ -1,15 +0,0 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data")
|
||||
|
||||
add_library (_nlp_data INTERFACE)
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET nlp_dictionaries
|
||||
RESOURCE_DIR "${LIBRARY_DIR}"
|
||||
RESOURCES charset.zst tonality_ru.zst programming.zst
|
||||
)
|
||||
|
||||
add_dependencies(_nlp_data nlp_dictionaries)
|
||||
target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:nlp_dictionaries> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
add_library(ch_contrib::nlp_data ALIAS _nlp_data)
|
@ -147,6 +147,7 @@ function clone_submodules
|
||||
contrib/simdjson
|
||||
contrib/liburing
|
||||
contrib/libfiu
|
||||
contrib/incbin
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
|
@ -4,6 +4,9 @@
|
||||
set -e -x -a
|
||||
|
||||
# Choose random timezone for this test run.
|
||||
#
|
||||
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
|
||||
# (it will choose between default server timezone and something specific).
|
||||
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
||||
echo "Choosen random timezone $TZ"
|
||||
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone
|
||||
|
@ -945,44 +945,6 @@ Result:
|
||||
└────────────┴───────┘
|
||||
```
|
||||
|
||||
## toDecimalString
|
||||
|
||||
Converts a numeric value to String with the number of fractional digits in the output specified by the user.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
toDecimalString(number, scale)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md),
|
||||
- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md).
|
||||
* Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal),
|
||||
* Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale).
|
||||
The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐
|
||||
│ 64.32000 │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## reinterpretAsUInt(8\|16\|32\|64)
|
||||
|
||||
## reinterpretAsInt(8\|16\|32\|64)
|
||||
|
@ -762,44 +762,6 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
|
||||
└────────────┴───────┘
|
||||
```
|
||||
|
||||
## toDecimalString
|
||||
|
||||
Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
toDecimalString(number, scale)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md),
|
||||
- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md).
|
||||
* Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов),
|
||||
* Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части.
|
||||
При необходимости число округляется по стандартным правилам арифметики.
|
||||
|
||||
**Пример использования**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimalString(CAST('64.32', 'Float64'), 5);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```response
|
||||
┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐
|
||||
│ 64.32000 │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
|
||||
|
||||
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
|
||||
|
@ -20,10 +20,7 @@
|
||||
#include <Common/formatReadable.h>
|
||||
#include <Common/Config/ConfigProcessor.h>
|
||||
#include <Common/OpenSSLHelpers.h>
|
||||
#include <base/hex.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <base/sleep.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
@ -35,6 +32,14 @@
|
||||
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
|
||||
#include <incbin.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/// Embedded configuration files used inside the install program
|
||||
INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml");
|
||||
INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml");
|
||||
|
||||
|
||||
/** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary.
|
||||
* It also allows to avoid dependency on systemd, upstart, SysV init.
|
||||
@ -560,7 +565,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
if (!fs::exists(main_config_file))
|
||||
{
|
||||
std::string_view main_config_content = getResource("config.xml");
|
||||
std::string_view main_config_content(reinterpret_cast<const char *>(gresource_config_xmlData), gresource_config_xmlSize);
|
||||
if (main_config_content.empty())
|
||||
{
|
||||
fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string());
|
||||
@ -672,7 +677,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
if (!fs::exists(users_config_file))
|
||||
{
|
||||
std::string_view users_config_content = getResource("users.xml");
|
||||
std::string_view users_config_content(reinterpret_cast<const char *>(gresource_users_xmlData), gresource_users_xmlSize);
|
||||
if (users_config_content.empty())
|
||||
{
|
||||
fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string());
|
||||
|
@ -1,16 +1,3 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
if (OS_LINUX)
|
||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
# for some reason INTERFACE linkage doesn't work for standalone binary
|
||||
set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_keeper_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
endif ()
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET clickhouse_keeper_configs
|
||||
RESOURCES keeper_config.xml keeper_embedded.xml
|
||||
)
|
||||
|
||||
set(CLICKHOUSE_KEEPER_SOURCES
|
||||
Keeper.cpp
|
||||
)
|
||||
@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK
|
||||
clickhouse_program_add(keeper)
|
||||
|
||||
install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)
|
||||
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
|
||||
|
||||
if (BUILD_STANDALONE_KEEPER)
|
||||
# Straight list of all required sources
|
||||
@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
|
||||
)
|
||||
|
||||
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
|
||||
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
||||
|
||||
if (SPLIT_DEBUG_SYMBOLS)
|
||||
|
@ -457,8 +457,10 @@ try
|
||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||
|
||||
std::vector<std::string> extra_paths = {include_from_path};
|
||||
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty()) extra_paths.emplace_back(key_path);
|
||||
if (!cert_path.empty())
|
||||
extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty())
|
||||
extra_paths.emplace_back(key_path);
|
||||
|
||||
/// ConfigReloader have to strict parameters which are redundant in our case
|
||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||
|
@ -1,12 +1,8 @@
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
|
||||
set(CLICKHOUSE_SERVER_SOURCES
|
||||
MetricsTransmitter.cpp
|
||||
Server.cpp
|
||||
)
|
||||
|
||||
set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:clickhouse_server_configs> -Wl,${NO_WHOLE_ARCHIVE}")
|
||||
|
||||
set (CLICKHOUSE_SERVER_LINK
|
||||
PRIVATE
|
||||
clickhouse_aggregate_functions
|
||||
@ -34,9 +30,3 @@ endif()
|
||||
clickhouse_program_add(server)
|
||||
|
||||
install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse)
|
||||
|
||||
clickhouse_embed_binaries(
|
||||
TARGET clickhouse_server_configs
|
||||
RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js
|
||||
)
|
||||
add_dependencies(clickhouse-server-lib clickhouse_server_configs)
|
||||
|
@ -128,6 +128,10 @@
|
||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
#endif
|
||||
|
||||
#include <incbin.h>
|
||||
/// A minimal file used when the server is run without installation
|
||||
INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml");
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric Revision;
|
||||
@ -393,6 +397,7 @@ int Server::run()
|
||||
|
||||
void Server::initialize(Poco::Util::Application & self)
|
||||
{
|
||||
ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast<const char *>(gresource_embedded_xmlData), gresource_embedded_xmlSize));
|
||||
BaseDaemon::initialize(self);
|
||||
logger().information("starting up");
|
||||
|
||||
@ -1106,8 +1111,10 @@ try
|
||||
const std::string key_path = config().getString("openSSL.server.privateKeyFile", "");
|
||||
|
||||
std::vector<std::string> extra_paths = {include_from_path};
|
||||
if (!cert_path.empty()) extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty()) extra_paths.emplace_back(key_path);
|
||||
if (!cert_path.empty())
|
||||
extra_paths.emplace_back(cert_path);
|
||||
if (!key_path.empty())
|
||||
extra_paths.emplace_back(key_path);
|
||||
|
||||
auto main_config_reloader = std::make_unique<ConfigReloader>(
|
||||
config_path,
|
||||
|
0
programs/server/resources.cpp
Normal file
0
programs/server/resources.cpp
Normal file
@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc)
|
||||
target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
|
||||
endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin)
|
||||
|
||||
add_subdirectory(Access/Common)
|
||||
add_subdirectory(Common/ZooKeeper)
|
||||
@ -561,7 +561,6 @@ if (ENABLE_NLP)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::stemmer)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::wnb)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen)
|
||||
dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::ulid)
|
||||
|
@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES)
|
||||
endif()
|
||||
|
||||
if (ENABLE_MYSQL)
|
||||
add_subdirectory (mysqlxx)
|
||||
add_subdirectory(mysqlxx)
|
||||
endif ()
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <Common/XMLUtils.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/errnoToString.h>
|
||||
@ -83,6 +82,13 @@ ConfigProcessor::~ConfigProcessor()
|
||||
Poco::Logger::destroy("ConfigProcessor");
|
||||
}
|
||||
|
||||
static std::unordered_map<std::string, std::string_view> embedded_configs;
|
||||
|
||||
void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content)
|
||||
{
|
||||
embedded_configs[name] = content;
|
||||
}
|
||||
|
||||
|
||||
/// Vector containing the name of the element and a sorted list of attribute names and values
|
||||
/// (except "remove" and "replace" attributes).
|
||||
@ -281,15 +287,15 @@ void ConfigProcessor::doIncludesRecursive(
|
||||
{
|
||||
std::string value = node->nodeValue();
|
||||
|
||||
bool replace_occured = false;
|
||||
bool replace_occurred = false;
|
||||
size_t pos;
|
||||
while ((pos = value.find(substitution.first)) != std::string::npos)
|
||||
{
|
||||
value.replace(pos, substitution.first.length(), substitution.second);
|
||||
replace_occured = true;
|
||||
replace_occurred = true;
|
||||
}
|
||||
|
||||
if (replace_occured)
|
||||
if (replace_occurred)
|
||||
node->setNodeValue(value);
|
||||
}
|
||||
}
|
||||
@ -528,26 +534,14 @@ XMLDocumentPtr ConfigProcessor::processConfig(
|
||||
}
|
||||
else
|
||||
{
|
||||
/// These embedded files added during build with some cmake magic.
|
||||
/// Look at the end of programs/server/CMakeLists.txt.
|
||||
std::string embedded_name;
|
||||
if (path == "config.xml")
|
||||
embedded_name = "embedded.xml";
|
||||
|
||||
if (path == "keeper_config.xml")
|
||||
embedded_name = "keeper_embedded.xml";
|
||||
|
||||
/// When we can use config embedded in binary.
|
||||
if (!embedded_name.empty())
|
||||
/// When we can use a config embedded in the binary.
|
||||
if (auto it = embedded_configs.find(path); it != embedded_configs.end())
|
||||
{
|
||||
auto resource = getResource(embedded_name);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
|
||||
LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path);
|
||||
config = dom_parser.parseMemory(resource.data(), resource.size());
|
||||
config = dom_parser.parseMemory(it->second.data(), it->second.size());
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path);
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path);
|
||||
}
|
||||
|
||||
std::vector<std::string> contributing_files;
|
||||
|
@ -65,6 +65,9 @@ public:
|
||||
zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr,
|
||||
const zkutil::EventPtr & zk_changed_event = nullptr);
|
||||
|
||||
/// These configurations will be used if there is no configuration file.
|
||||
static void registerEmbeddedConfig(std::string name, std::string_view content);
|
||||
|
||||
|
||||
/// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration.
|
||||
/// The resulting XML document is saved into a file with the name
|
||||
|
@ -3,7 +3,6 @@
|
||||
#include <cctz/civil_time.h>
|
||||
#include <cctz/time_zone.h>
|
||||
#include <cctz/zone_info_source.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
#include <algorithm>
|
||||
@ -11,6 +10,11 @@
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
|
||||
|
||||
/// Embedded timezones.
|
||||
std::string_view getTimeZone(const char * name);
|
||||
|
||||
|
||||
namespace
|
||||
@ -249,9 +253,10 @@ namespace cctz_extension
|
||||
const std::string & name,
|
||||
const std::function<std::unique_ptr<cctz::ZoneInfoSource>(const std::string & name)> & fallback)
|
||||
{
|
||||
std::string_view resource = getResource(name);
|
||||
if (!resource.empty())
|
||||
return std::make_unique<Source>(resource.data(), resource.size());
|
||||
std::string_view tz_file = getTimeZone(name.data());
|
||||
|
||||
if (!tz_file.empty())
|
||||
return std::make_unique<Source>(tz_file.data(), tz_file.size());
|
||||
|
||||
return fallback(name);
|
||||
}
|
||||
|
185
src/Common/FrequencyHolder.cpp
Normal file
185
src/Common/FrequencyHolder.cpp
Normal file
@ -0,0 +1,185 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <incbin.h>
|
||||
|
||||
/// Embedded SQL definitions
|
||||
INCBIN(resource_charset_zst, SOURCE_DIR "/contrib/nlp-data/charset.zst");
|
||||
INCBIN(resource_tonality_ru_zst, SOURCE_DIR "/contrib/nlp-data/tonality_ru.zst");
|
||||
INCBIN(resource_programming_zst, SOURCE_DIR "/contrib/nlp-data/programming.zst");
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
|
||||
FrequencyHolder & FrequencyHolder::getInstance()
|
||||
{
|
||||
static FrequencyHolder instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
FrequencyHolder::FrequencyHolder()
|
||||
{
|
||||
loadEmotionalDict();
|
||||
loadEncodingsFrequency();
|
||||
loadProgrammingFrequency();
|
||||
}
|
||||
|
||||
void FrequencyHolder::loadEncodingsFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded charset frequencies");
|
||||
|
||||
std::string_view resource(reinterpret_cast<const char *>(gresource_charset_zstData), gresource_charset_zstSize);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
|
||||
|
||||
String line;
|
||||
UInt16 bigram;
|
||||
Float64 frequency;
|
||||
String charset_name;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new charset
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(charset_name, buf_line);
|
||||
|
||||
/* In our dictionary we have lines with form: <Language>_<Charset>
|
||||
* If we need to find language of data, we return <Language>
|
||||
* If we need to find charset of data, we return <Charset>.
|
||||
*/
|
||||
size_t sep = charset_name.find('_');
|
||||
|
||||
Encoding enc;
|
||||
enc.lang = charset_name.substr(0, sep);
|
||||
enc.name = charset_name.substr(sep + 1);
|
||||
encodings_freq.push_back(std::move(enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
readIntText(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
encodings_freq.back().map[bigram] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
|
||||
}
|
||||
|
||||
void FrequencyHolder::loadEmotionalDict()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
|
||||
LOG_TRACE(log, "Loading embedded emotional dictionary");
|
||||
|
||||
std::string_view resource(reinterpret_cast<const char *>(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
|
||||
|
||||
String line;
|
||||
String word;
|
||||
Float64 tonality;
|
||||
size_t count = 0;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
readStringUntilWhitespace(word, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(tonality, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
|
||||
emotional_dict[ref] = tonality;
|
||||
++count;
|
||||
}
|
||||
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
|
||||
}
|
||||
|
||||
void FrequencyHolder::loadProgrammingFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
|
||||
|
||||
std::string_view resource(reinterpret_cast<const char *>(gresource_programming_zstData), gresource_programming_zstSize);
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
|
||||
|
||||
String line;
|
||||
String bigram;
|
||||
Float64 frequency;
|
||||
String programming_language;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new language
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(programming_language, buf_line);
|
||||
|
||||
Language lang;
|
||||
lang.name = programming_language;
|
||||
programming_freq.push_back(std::move(lang));
|
||||
}
|
||||
else
|
||||
{
|
||||
readStringUntilWhitespace(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
|
||||
programming_freq.back().map[ref] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Programming languages frequencies was added");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,5 +1,9 @@
|
||||
#pragma once
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <base/StringRef.h>
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
@ -7,7 +11,6 @@
|
||||
#include <unordered_map>
|
||||
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
@ -20,11 +23,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int FILE_DOESNT_EXIST;
|
||||
}
|
||||
|
||||
/// FrequencyHolder class is responsible for storing and loading dictionaries
|
||||
/// needed for text classification functions:
|
||||
///
|
||||
@ -56,11 +54,7 @@ public:
|
||||
using EncodingMap = HashMap<UInt16, Float64>;
|
||||
using EncodingContainer = std::vector<Encoding>;
|
||||
|
||||
static FrequencyHolder & getInstance()
|
||||
{
|
||||
static FrequencyHolder instance;
|
||||
return instance;
|
||||
}
|
||||
static FrequencyHolder & getInstance();
|
||||
|
||||
const Map & getEmotionalDict() const
|
||||
{
|
||||
@ -78,161 +72,11 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
FrequencyHolder();
|
||||
|
||||
FrequencyHolder()
|
||||
{
|
||||
loadEmotionalDict();
|
||||
loadEncodingsFrequency();
|
||||
loadProgrammingFrequency();
|
||||
}
|
||||
|
||||
void loadEncodingsFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded charset frequencies");
|
||||
|
||||
auto resource = getResource("charset.zst");
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies");
|
||||
|
||||
String line;
|
||||
UInt16 bigram;
|
||||
Float64 frequency;
|
||||
String charset_name;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new charset
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(charset_name, buf_line);
|
||||
|
||||
/* In our dictionary we have lines with form: <Language>_<Charset>
|
||||
* If we need to find language of data, we return <Language>
|
||||
* If we need to find charset of data, we return <Charset>.
|
||||
*/
|
||||
size_t sep = charset_name.find('_');
|
||||
|
||||
Encoding enc;
|
||||
enc.lang = charset_name.substr(0, sep);
|
||||
enc.name = charset_name.substr(sep + 1);
|
||||
encodings_freq.push_back(std::move(enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
readIntText(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
encodings_freq.back().map[bigram] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size());
|
||||
}
|
||||
|
||||
void loadEmotionalDict()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("EmotionalDict");
|
||||
LOG_TRACE(log, "Loading embedded emotional dictionary");
|
||||
|
||||
auto resource = getResource("tonality_ru.zst");
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary");
|
||||
|
||||
String line;
|
||||
String word;
|
||||
Float64 tonality;
|
||||
size_t count = 0;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
readStringUntilWhitespace(word, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(tonality, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(word.data(), word.size()), word.size()};
|
||||
emotional_dict[ref] = tonality;
|
||||
++count;
|
||||
}
|
||||
LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count));
|
||||
}
|
||||
|
||||
void loadProgrammingFrequency()
|
||||
{
|
||||
Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency");
|
||||
|
||||
LOG_TRACE(log, "Loading embedded programming languages frequencies loading");
|
||||
|
||||
auto resource = getResource("programming.zst");
|
||||
if (resource.empty())
|
||||
throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies");
|
||||
|
||||
String line;
|
||||
String bigram;
|
||||
Float64 frequency;
|
||||
String programming_language;
|
||||
|
||||
auto buf = std::make_unique<ReadBufferFromMemory>(resource.data(), resource.size());
|
||||
ZstdInflatingReadBuffer in(std::move(buf));
|
||||
|
||||
while (!in.eof())
|
||||
{
|
||||
readString(line, in);
|
||||
in.ignore();
|
||||
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
ReadBufferFromString buf_line(line);
|
||||
|
||||
// Start loading a new language
|
||||
if (line.starts_with("// "))
|
||||
{
|
||||
// Skip "// "
|
||||
buf_line.ignore(3);
|
||||
readString(programming_language, buf_line);
|
||||
|
||||
Language lang;
|
||||
lang.name = programming_language;
|
||||
programming_freq.push_back(std::move(lang));
|
||||
}
|
||||
else
|
||||
{
|
||||
readStringUntilWhitespace(bigram, buf_line);
|
||||
buf_line.ignore();
|
||||
readFloatText(frequency, buf_line);
|
||||
|
||||
StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()};
|
||||
programming_freq.back().map[ref] = frequency;
|
||||
}
|
||||
}
|
||||
LOG_TRACE(log, "Programming languages frequencies was added");
|
||||
}
|
||||
void loadEncodingsFrequency();
|
||||
void loadEmotionalDict();
|
||||
void loadProgrammingFrequency();
|
||||
|
||||
Arena string_pool;
|
||||
|
||||
@ -241,3 +85,5 @@ private:
|
||||
EncodingContainer encodings_freq;
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -45,6 +45,7 @@
|
||||
M(MMappedFileCacheMisses, "Number of times a file has not been found in the MMap cache (for the 'mmap' read_method), so we had to mmap it again.") \
|
||||
M(OpenedFileCacheHits, "Number of times a file has been found in the opened file cache, so we didn't have to open it again.") \
|
||||
M(OpenedFileCacheMisses, "Number of times a file has been found in the opened file cache, so we had to open it again.") \
|
||||
M(OpenedFileCacheMicroseconds, "Amount of time spent executing OpenedFileCache methods.") \
|
||||
M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \
|
||||
M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \
|
||||
M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \
|
||||
|
@ -87,50 +87,13 @@ namespace
|
||||
/// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object
|
||||
|
||||
|
||||
void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources)
|
||||
{
|
||||
const char * char_address = static_cast<const char *>(address);
|
||||
|
||||
if (name.starts_with("_binary_") || name.starts_with("binary_"))
|
||||
{
|
||||
if (name.ends_with("_start"))
|
||||
{
|
||||
name = name.substr((name[0] == '_') + strlen("binary_"));
|
||||
name = name.substr(0, name.size() - strlen("_start"));
|
||||
|
||||
auto & resource = resources[name];
|
||||
if (!resource.base_address || resource.base_address == base_address)
|
||||
{
|
||||
resource.base_address = base_address;
|
||||
resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
|
||||
resource.object_name = object_name;
|
||||
}
|
||||
}
|
||||
if (name.ends_with("_end"))
|
||||
{
|
||||
name = name.substr((name[0] == '_') + strlen("binary_"));
|
||||
name = name.substr(0, name.size() - strlen("_end"));
|
||||
|
||||
auto & resource = resources[name];
|
||||
if (!resource.base_address || resource.base_address == base_address)
|
||||
{
|
||||
resource.base_address = base_address;
|
||||
resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor)
|
||||
resource.object_name = object_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Based on the code of musl-libc and the answer of Kanalpiroge on
|
||||
/// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture
|
||||
/// It does not extract all the symbols (but only public - exported and used for dynamic linking),
|
||||
/// but will work if we cannot find or parse ELF files.
|
||||
void collectSymbolsFromProgramHeaders(
|
||||
dl_phdr_info * info,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
SymbolIndex::Resources & resources)
|
||||
std::vector<SymbolIndex::Symbol> & symbols)
|
||||
{
|
||||
/* Iterate over all headers of the current shared lib
|
||||
* (first call is for the executable itself)
|
||||
@ -248,9 +211,6 @@ void collectSymbolsFromProgramHeaders(
|
||||
/// We are not interested in empty symbols.
|
||||
if (elf_sym[sym_index].st_size)
|
||||
symbols.push_back(symbol);
|
||||
|
||||
/// But resources can be represented by a pair of empty symbols (indicating their boundaries).
|
||||
updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources);
|
||||
}
|
||||
|
||||
break;
|
||||
@ -281,8 +241,7 @@ void collectSymbolsFromELFSymbolTable(
|
||||
const Elf & elf,
|
||||
const Elf::Section & symbol_table,
|
||||
const Elf::Section & string_table,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
SymbolIndex::Resources & resources)
|
||||
std::vector<SymbolIndex::Symbol> & symbols)
|
||||
{
|
||||
/// Iterate symbol table.
|
||||
const ElfSym * symbol_table_entry = reinterpret_cast<const ElfSym *>(symbol_table.begin());
|
||||
@ -312,8 +271,6 @@ void collectSymbolsFromELFSymbolTable(
|
||||
|
||||
if (symbol_table_entry->st_size)
|
||||
symbols.push_back(symbol);
|
||||
|
||||
updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources);
|
||||
}
|
||||
}
|
||||
|
||||
@ -323,8 +280,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
||||
const Elf & elf,
|
||||
unsigned section_header_type,
|
||||
const char * string_table_name,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
SymbolIndex::Resources & resources)
|
||||
std::vector<SymbolIndex::Symbol> & symbols)
|
||||
{
|
||||
std::optional<Elf::Section> symbol_table;
|
||||
std::optional<Elf::Section> string_table;
|
||||
@ -342,7 +298,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable(
|
||||
return false;
|
||||
}
|
||||
|
||||
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources);
|
||||
collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -351,7 +307,6 @@ void collectSymbolsFromELF(
|
||||
dl_phdr_info * info,
|
||||
std::vector<SymbolIndex::Symbol> & symbols,
|
||||
std::vector<SymbolIndex::Object> & objects,
|
||||
SymbolIndex::Resources & resources,
|
||||
String & build_id)
|
||||
{
|
||||
String object_name;
|
||||
@ -462,11 +417,11 @@ void collectSymbolsFromELF(
|
||||
object.name = object_name;
|
||||
objects.push_back(std::move(object));
|
||||
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources);
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols);
|
||||
|
||||
/// Unneeded if they were parsed from "program headers" of loaded objects.
|
||||
#if defined USE_MUSL
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources);
|
||||
searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -479,8 +434,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr)
|
||||
{
|
||||
SymbolIndex::Data & data = *reinterpret_cast<SymbolIndex::Data *>(data_ptr);
|
||||
|
||||
collectSymbolsFromProgramHeaders(info, data.symbols, data.resources);
|
||||
collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id);
|
||||
collectSymbolsFromProgramHeaders(info, data.symbols);
|
||||
collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id);
|
||||
|
||||
/* Continue iterations */
|
||||
return 0;
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Common/Elf.h>
|
||||
#include <boost/noncopyable.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -45,44 +46,15 @@ public:
|
||||
const std::vector<Symbol> & symbols() const { return data.symbols; }
|
||||
const std::vector<Object> & objects() const { return data.objects; }
|
||||
|
||||
std::string_view getResource(String name) const
|
||||
{
|
||||
if (auto it = data.resources.find(name); it != data.resources.end())
|
||||
return it->second.data();
|
||||
return {};
|
||||
}
|
||||
|
||||
/// The BuildID that is generated by compiler.
|
||||
String getBuildID() const { return data.build_id; }
|
||||
String getBuildIDHex() const;
|
||||
|
||||
struct ResourcesBlob
|
||||
{
|
||||
/// Symbol can be presented in multiple shared objects,
|
||||
/// base_address will be used to compare only symbols from the same SO.
|
||||
ElfW(Addr) base_address = 0;
|
||||
/// Just a human name of the SO.
|
||||
std::string_view object_name;
|
||||
/// Data blob.
|
||||
std::string_view start;
|
||||
std::string_view end;
|
||||
|
||||
std::string_view data() const
|
||||
{
|
||||
assert(end.data() >= start.data());
|
||||
return std::string_view{start.data(), static_cast<size_t>(end.data() - start.data())};
|
||||
}
|
||||
};
|
||||
using Resources = std::unordered_map<std::string_view /* symbol name */, ResourcesBlob>;
|
||||
|
||||
struct Data
|
||||
{
|
||||
std::vector<Symbol> symbols;
|
||||
std::vector<Object> objects;
|
||||
String build_id;
|
||||
|
||||
/// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end.
|
||||
Resources resources;
|
||||
};
|
||||
private:
|
||||
Data data;
|
||||
|
@ -59,3 +59,7 @@
|
||||
#cmakedefine01 USE_ULID
|
||||
#cmakedefine01 FIU_ENABLE
|
||||
#cmakedefine01 USE_BCRYPT
|
||||
|
||||
/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO.
|
||||
/// That's why we use absolute paths.
|
||||
#cmakedefine SOURCE_DIR "@SOURCE_DIR@"
|
||||
|
@ -1,52 +0,0 @@
|
||||
#include "getResource.h"
|
||||
#include <dlfcn.h>
|
||||
#include <string>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
#include <Common/SymbolIndex.h>
|
||||
|
||||
|
||||
std::string_view getResource(std::string_view name)
|
||||
{
|
||||
// Convert the resource file name into the form generated by `ld -r -b binary`.
|
||||
std::string name_replaced(name);
|
||||
std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
|
||||
std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
|
||||
std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
|
||||
boost::replace_all(name_replaced, "+", "_PLUS_");
|
||||
|
||||
#if defined USE_MUSL
|
||||
/// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
|
||||
return DB::SymbolIndex::instance().getResource(name_replaced);
|
||||
|
||||
#else
|
||||
// In most `dlsym(3)` APIs, one passes the symbol name as it appears via
|
||||
// something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
|
||||
// looked up with the string `"_foo"`.
|
||||
//
|
||||
// Apple's linker is confusingly different. The NOTES on the man page for
|
||||
// `dlsym(3)` claim that one looks up the symbol with "the name used in C
|
||||
// source code". In this example, that would mean using the string `"foo"`.
|
||||
// This apparently applies even in the case where the symbol did not originate
|
||||
// from C source, such as the embedded binary resource files used here. So
|
||||
// the symbol name must not have a leading `_` on Apple platforms. It's not
|
||||
// clear how this applies to other symbols, such as those which _have_ a leading
|
||||
// underscore in them by design, many leading underscores, etc.
|
||||
#if defined OS_DARWIN
|
||||
std::string prefix = "binary_";
|
||||
#else
|
||||
std::string prefix = "_binary_";
|
||||
#endif
|
||||
std::string symbol_name_start = prefix + name_replaced + "_start";
|
||||
std::string symbol_name_end = prefix + name_replaced + "_end";
|
||||
|
||||
const char * sym_start = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
|
||||
const char * sym_end = reinterpret_cast<const char *>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));
|
||||
|
||||
if (sym_start && sym_end)
|
||||
{
|
||||
auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
|
||||
return { sym_start, resource_size };
|
||||
}
|
||||
return {};
|
||||
#endif
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
|
||||
/// Get resource from binary if exists. Otherwise return empty string view.
|
||||
/// Resources are data that is embedded into executable at link time.
|
||||
std::string_view getResource(std::string_view name);
|
@ -44,4 +44,15 @@ String backQuoteIfNeed(StringRef x)
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
String backQuoteMySQL(StringRef x)
|
||||
{
|
||||
String res(x.size, '\0');
|
||||
{
|
||||
WriteBufferFromString wb(res);
|
||||
writeBackQuotedStringMySQL(x, wb);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -24,4 +24,7 @@ String backQuote(StringRef x);
|
||||
/// Quote the identifier with backquotes, if required.
|
||||
String backQuoteIfNeed(StringRef x);
|
||||
|
||||
/// Quote the identifier with backquotes, for use in MySQL queries.
|
||||
String backQuoteMySQL(StringRef x);
|
||||
|
||||
}
|
||||
|
@ -548,4 +548,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970,
|
||||
// {0, 0 + 11 * 3600 * 24 + 12, 11},
|
||||
}))
|
||||
);
|
||||
|
||||
|
@ -38,7 +38,6 @@
|
||||
#include <base/coverage.h>
|
||||
#include <base/sleep.h>
|
||||
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
|
||||
#include <IO/ReadBufferFromFileDescriptor.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
|
||||
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
|
||||
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
|
||||
#include <cstdlib>
|
||||
#include <random>
|
||||
#include <string_view>
|
||||
@ -342,9 +343,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection,
|
||||
{ std::make_shared<DataTypeString>(), "column_type" }
|
||||
};
|
||||
|
||||
const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
|
||||
" WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) +
|
||||
"' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION";
|
||||
String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
|
||||
" WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION";
|
||||
|
||||
StreamSettings mysql_input_stream_settings(global_settings, false, true);
|
||||
auto mysql_source = std::make_unique<MySQLSource>(connection, query, tables_columns_sample_block, mysql_input_stream_settings);
|
||||
@ -812,6 +812,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
|
||||
CurrentThread::QueryScope query_scope(query_context);
|
||||
|
||||
String query = query_event.query;
|
||||
tryQuoteUnrecognizedTokens(query, query);
|
||||
if (!materialized_tables_list.empty())
|
||||
{
|
||||
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);
|
||||
|
@ -0,0 +1,289 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
|
||||
|
||||
using namespace DB;
|
||||
|
||||
struct TestCase
|
||||
{
|
||||
String query;
|
||||
String res;
|
||||
bool ok;
|
||||
|
||||
TestCase(
|
||||
const String & query_,
|
||||
const String & res_,
|
||||
bool ok_)
|
||||
: query(query_)
|
||||
, res(res_)
|
||||
, ok(ok_)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case)
|
||||
{
|
||||
return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok;
|
||||
}
|
||||
|
||||
class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam<TestCase>
|
||||
{
|
||||
};
|
||||
|
||||
TEST_P(QuoteUnrecognizedTokensTest, escape)
|
||||
{
|
||||
const auto & [query, expected, ok] = GetParam();
|
||||
String actual;
|
||||
bool res = tryQuoteUnrecognizedTokens(query, actual);
|
||||
EXPECT_EQ(ok, res);
|
||||
EXPECT_EQ(expected, actual);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list<TestCase>{
|
||||
{
|
||||
"",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"test '\"`",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"SELECT * FROM db.`table`",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"道渠",
|
||||
"`道渠`",
|
||||
true
|
||||
},
|
||||
{
|
||||
"道",
|
||||
"`道`",
|
||||
true
|
||||
},
|
||||
{
|
||||
"道道(skip) 道(",
|
||||
"`道道`(skip) `道`(",
|
||||
true
|
||||
},
|
||||
{
|
||||
"`道渠`",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"'道'",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"\"道\"",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"` 道 test 渠 `",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"skip 道 skip 123",
|
||||
"skip `道` skip 123",
|
||||
true
|
||||
},
|
||||
{
|
||||
"skip 123 `道` skip",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"skip `道 skip 123",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"skip test道 skip",
|
||||
"skip `test道` skip",
|
||||
true
|
||||
},
|
||||
{
|
||||
"test道2test",
|
||||
"`test道2test`",
|
||||
true
|
||||
},
|
||||
{
|
||||
"skip test道2test 123",
|
||||
"skip `test道2test` 123",
|
||||
true
|
||||
},
|
||||
{
|
||||
"skip 您a您a您a a您a您a您a 1您2您3您4 skip",
|
||||
"skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip",
|
||||
true
|
||||
},
|
||||
{
|
||||
"skip 您a 您a您a b您2您c您4 skip",
|
||||
"skip `您a` `您a您a` `b您2您c您4` skip",
|
||||
true
|
||||
},
|
||||
{
|
||||
"123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip",
|
||||
"`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip",
|
||||
true
|
||||
},
|
||||
{
|
||||
"_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip",
|
||||
"`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip",
|
||||
true
|
||||
},
|
||||
{
|
||||
"TABLE 您2 您(",
|
||||
"TABLE `您2` `您`(",
|
||||
true
|
||||
},
|
||||
{
|
||||
"TABLE 您.a您2(日2日2 INT",
|
||||
"TABLE `您`.`a您2`(`日2日2` INT",
|
||||
true
|
||||
},
|
||||
{
|
||||
"TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)",
|
||||
"TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)",
|
||||
true
|
||||
},
|
||||
{
|
||||
"TABLE 您a日.您a您a您a(test INT",
|
||||
"TABLE `您a日`.`您a您a您a`(test INT",
|
||||
true
|
||||
},
|
||||
{
|
||||
"TABLE 您a日.您a您a您a(Hi您Hi好Hi INT",
|
||||
"TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT",
|
||||
true
|
||||
},
|
||||
{
|
||||
"--TABLE 您a日.您a您a您a(test INT",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"--您a日.您a您a您a(\n您Hi好",
|
||||
"--您a日.您a您a您a(\n`您Hi好`",
|
||||
true
|
||||
},
|
||||
{
|
||||
" /* TABLE 您a日.您a您a您a(test INT",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"/*您a日.您a您a您a(*/\n您Hi好",
|
||||
"/*您a日.您a您a您a(*/\n`您Hi好`",
|
||||
true
|
||||
},
|
||||
{
|
||||
" 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a",
|
||||
" `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`",
|
||||
true
|
||||
},
|
||||
//{ TODO
|
||||
// "TABLE 您2.您a您a您a(test INT",
|
||||
// "TABLE `您2`.`您a您a您a`(test INT",
|
||||
// true
|
||||
//},
|
||||
{
|
||||
"skip 您a您a您a skip",
|
||||
"skip `您a您a您a` skip",
|
||||
true
|
||||
},
|
||||
{
|
||||
"test 您a2您3a您a 4 again",
|
||||
"test `您a2您3a您a` 4 again",
|
||||
true
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db.`道渠`",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db.`道渠",
|
||||
"",
|
||||
false
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db.道渠",
|
||||
"CREATE TABLE db.`道渠`",
|
||||
true
|
||||
},
|
||||
{
|
||||
"CREATE TABLE db. 道渠",
|
||||
"CREATE TABLE db. `道渠`",
|
||||
true
|
||||
},
|
||||
{
|
||||
R"sql(
|
||||
CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL,
|
||||
您 INT,
|
||||
道渠 DATETIME,
|
||||
您test INT, test您 INT, test您test INT,
|
||||
道渠test INT, test道渠 INT, test道渠test INT,
|
||||
您_ INT, _您 INT, _您_ INT,
|
||||
您您__ INT, __您您 INT, __您您__ INT,
|
||||
您2 INT, 2您 INT, 2您2 INT,
|
||||
您您22 INT, 22您您 INT, 22您您22 INT,
|
||||
您_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT,
|
||||
您您__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT,
|
||||
您2_ INT, 2_您 INT, 2_您2_ INT,
|
||||
您您22__ INT, 22__您您 INT, 22__您您22__ INT,
|
||||
您_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT,
|
||||
您您_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT,
|
||||
您test3 INT, test3您 INT, test3您test3 INT, test3您3test INT,
|
||||
您您test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test INT,
|
||||
您3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT,
|
||||
您您3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT,
|
||||
您_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT,
|
||||
您您_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT,
|
||||
您_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT,
|
||||
test_日期 varchar(256), test_道_2 varchar(256) NOT NULL ,
|
||||
test_道渠您_3
|
||||
BIGINT NOT NULL,
|
||||
道您3_test INT,
|
||||
PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
|
||||
)sql",
|
||||
R"sql(
|
||||
CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL,
|
||||
`您` INT,
|
||||
`道渠` DATETIME,
|
||||
`您test` INT, `test您` INT, `test您test` INT,
|
||||
`道渠test` INT, `test道渠` INT, `test道渠test` INT,
|
||||
`您_` INT, `_您` INT, `_您_` INT,
|
||||
`您您__` INT, `__您您` INT, `__您您__` INT,
|
||||
`您2` INT, `2您` INT, `2您2` INT,
|
||||
`您您22` INT, `22您您` INT, `22您您22` INT,
|
||||
`您_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT,
|
||||
`您您__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT,
|
||||
`您2_` INT, `2_您` INT, `2_您2_` INT,
|
||||
`您您22__` INT, `22__您您` INT, `22__您您22__` INT,
|
||||
`您_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT,
|
||||
`您您_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT,
|
||||
`您test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT,
|
||||
`您您test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test` INT,
|
||||
`您3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT,
|
||||
`您您3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT,
|
||||
`您_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT,
|
||||
`您您_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT,
|
||||
`您_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT,
|
||||
`test_日期` varchar(256), `test_道_2` varchar(256) NOT NULL ,
|
||||
`test_道渠您_3`
|
||||
BIGINT NOT NULL,
|
||||
`道您3_test` INT,
|
||||
PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
|
||||
)sql",
|
||||
true
|
||||
},
|
||||
}));
|
96
src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp
Normal file
96
src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp
Normal file
@ -0,0 +1,96 @@
|
||||
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Common/quoteString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Checks if there are no any tokens (like whitespaces) between current and previous pos
|
||||
static bool noWhitespaces(const char * to, const char * from)
|
||||
{
|
||||
return static_cast<size_t>(from - to) == 0;
|
||||
}
|
||||
|
||||
/// Checks if the token should be quoted too together with unrecognized
|
||||
static bool isWordOrNumber(TokenType type)
|
||||
{
|
||||
return type == TokenType::BareWord || type == TokenType::Number;
|
||||
}
|
||||
|
||||
static void quoteLiteral(
|
||||
IParser::Pos & pos,
|
||||
IParser::Pos & pos_prev,
|
||||
const char *& pos_unrecognized,
|
||||
const char *& copy_from,
|
||||
String & rewritten_query)
|
||||
{
|
||||
/// Copy also whitespaces if any
|
||||
const auto * end =
|
||||
isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin)
|
||||
? pos->end
|
||||
: pos_prev->end;
|
||||
String literal(pos_unrecognized, static_cast<size_t>(end - pos_unrecognized));
|
||||
rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal));
|
||||
copy_from = end;
|
||||
}
|
||||
|
||||
bool tryQuoteUnrecognizedTokens(const String & query, String & res)
|
||||
{
|
||||
Tokens tokens(query.data(), query.data() + query.size());
|
||||
IParser::Pos pos(tokens, 0);
|
||||
Expected expected;
|
||||
String rewritten_query;
|
||||
const char * copy_from = query.data();
|
||||
auto pos_prev = pos;
|
||||
const char * pos_unrecognized = nullptr;
|
||||
for (;pos->type != TokenType::EndOfStream; ++pos)
|
||||
{
|
||||
/// Commit quotes if any whitespaces found or the token is not a word
|
||||
bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type));
|
||||
if (pos_unrecognized && commit)
|
||||
{
|
||||
quoteLiteral(
|
||||
pos,
|
||||
pos_prev,
|
||||
pos_unrecognized,
|
||||
copy_from,
|
||||
rewritten_query);
|
||||
pos_unrecognized = nullptr;
|
||||
}
|
||||
if (pos->type == TokenType::Error)
|
||||
{
|
||||
/// Find first appearance of the error token
|
||||
if (!pos_unrecognized)
|
||||
{
|
||||
pos_unrecognized =
|
||||
isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin)
|
||||
? pos_prev->begin
|
||||
: pos->begin;
|
||||
}
|
||||
}
|
||||
pos_prev = pos;
|
||||
}
|
||||
|
||||
/// There was EndOfStream but not committed unrecognized token
|
||||
if (pos_unrecognized)
|
||||
{
|
||||
quoteLiteral(
|
||||
pos,
|
||||
pos_prev,
|
||||
pos_unrecognized,
|
||||
copy_from,
|
||||
rewritten_query);
|
||||
pos_unrecognized = nullptr;
|
||||
}
|
||||
|
||||
/// If no Errors found
|
||||
if (copy_from == query.data())
|
||||
return false;
|
||||
|
||||
auto size = static_cast<size_t>(pos->end - copy_from);
|
||||
rewritten_query.append(copy_from, size);
|
||||
res = rewritten_query;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
10
src/Databases/MySQL/tryQuoteUnrecognizedTokens.h
Normal file
10
src/Databases/MySQL/tryQuoteUnrecognizedTokens.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
bool tryQuoteUnrecognizedTokens(const String & query, String & res);
|
||||
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionToDecimalString.h>
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
REGISTER_FUNCTION(ToDecimalString)
|
||||
{
|
||||
factory.registerFunction<FunctionToDecimalString>(
|
||||
FunctionDocumentation{
|
||||
.description=R"(
|
||||
Returns string representation of a number. First argument is the number of any numeric type,
|
||||
second argument is the desired number of digits in fractional part. Returns String.
|
||||
|
||||
)",
|
||||
.examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}},
|
||||
.categories{"String"}
|
||||
}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
@ -1,312 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Core/DecimalFunctions.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <IO/WriteBufferFromVector.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER;
|
||||
}
|
||||
|
||||
class FunctionToDecimalString : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "toDecimalString";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionToDecimalString>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isNumber(*arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal first argument for formatDecimal function: got {}, expected numeric type",
|
||||
arguments[0]->getName());
|
||||
|
||||
if (!isUInt8(*arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal second argument for formatDecimal function: got {}, expected UInt8",
|
||||
arguments[1]->getName());
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
private:
|
||||
/// For operations with Integer/Float
|
||||
template <typename FromVectorType>
|
||||
void vectorConstant(const FromVectorType & vec_from, UInt8 precision,
|
||||
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
|
||||
{
|
||||
size_t input_rows_count = vec_from.size();
|
||||
result_offsets.resize(input_rows_count);
|
||||
|
||||
/// Buffer is used here and in functions below because resulting size cannot be precisely anticipated,
|
||||
/// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case.
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
format(vec_from[i], buf_to, precision);
|
||||
result_offsets[i] = buf_to.count();
|
||||
}
|
||||
|
||||
buf_to.finalize();
|
||||
}
|
||||
|
||||
template <typename FirstArgVectorType>
|
||||
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
|
||||
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
|
||||
{
|
||||
size_t input_rows_count = vec_from.size();
|
||||
result_offsets.resize(input_rows_count);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
|
||||
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if (vec_precision[i] > max_digits)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too many fractional digits requested, shall not be more than {}", max_digits);
|
||||
format(vec_from[i], buf_to, vec_precision[i]);
|
||||
result_offsets[i] = buf_to.count();
|
||||
}
|
||||
|
||||
buf_to.finalize();
|
||||
}
|
||||
|
||||
template <typename FirstArgType>
|
||||
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
|
||||
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const
|
||||
{
|
||||
size_t input_rows_count = vec_precision.size();
|
||||
result_offsets.resize(input_rows_count);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
|
||||
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if (vec_precision[i] > max_digits)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too many fractional digits requested, shall not be more than {}", max_digits);
|
||||
format(value_from, buf_to, vec_precision[i]);
|
||||
result_offsets[i] = buf_to.count();
|
||||
}
|
||||
|
||||
buf_to.finalize();
|
||||
}
|
||||
|
||||
/// For operations with Decimal
|
||||
template <typename FirstArgVectorType>
|
||||
void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision,
|
||||
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
|
||||
{
|
||||
/// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77.
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
if (precision > max_digits)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
|
||||
size_t input_rows_count = vec_from.size();
|
||||
result_offsets.resize(input_rows_count);
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
writeText(vec_from[i], from_scale, buf_to, true, true, precision);
|
||||
writeChar(0, buf_to);
|
||||
result_offsets[i] = buf_to.count();
|
||||
}
|
||||
buf_to.finalize();
|
||||
}
|
||||
|
||||
template <typename FirstArgVectorType>
|
||||
void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector<UInt8>::Container & vec_precision,
|
||||
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
|
||||
{
|
||||
size_t input_rows_count = vec_from.size();
|
||||
result_offsets.resize(input_rows_count);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
|
||||
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if (vec_precision[i] > max_digits)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
|
||||
writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]);
|
||||
writeChar(0, buf_to);
|
||||
result_offsets[i] = buf_to.count();
|
||||
}
|
||||
buf_to.finalize();
|
||||
}
|
||||
|
||||
template <typename FirstArgType>
|
||||
void constantVector(const FirstArgType & value_from, const ColumnVector<UInt8>::Container & vec_precision,
|
||||
ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const
|
||||
{
|
||||
size_t input_rows_count = vec_precision.size();
|
||||
result_offsets.resize(input_rows_count);
|
||||
|
||||
WriteBufferFromVector<ColumnString::Chars> buf_to(vec_to);
|
||||
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
if (vec_precision[i] > max_digits)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too many fractional digits requested for Decimal, must not be more than {}", max_digits);
|
||||
writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]);
|
||||
writeChar(0, buf_to);
|
||||
result_offsets[i] = buf_to.count();
|
||||
}
|
||||
buf_to.finalize();
|
||||
}
|
||||
|
||||
template <is_floating_point T>
|
||||
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
|
||||
{
|
||||
/// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values,
|
||||
/// Catch this here to give user a more reasonable error.
|
||||
if (precision > 60)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too high precision requested for Float, must not be more than 60, got {}", Int8(precision));
|
||||
|
||||
DB::DoubleConverter<false>::BufferType buffer;
|
||||
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
|
||||
|
||||
const auto result = DB::DoubleConverter<false>::instance().ToFixed(value, precision, &builder);
|
||||
|
||||
if (!result)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value);
|
||||
|
||||
out.write(buffer, builder.position());
|
||||
writeChar(0, out);
|
||||
}
|
||||
|
||||
template <is_integer T>
|
||||
static void format(T value, DB::WriteBuffer & out, UInt8 precision)
|
||||
{
|
||||
/// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals).
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
if (precision > max_digits)
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER,
|
||||
"Too many fractional digits requested, shall not be more than {}", max_digits);
|
||||
writeText(value, out);
|
||||
if (precision > 0) [[likely]]
|
||||
{
|
||||
writeChar('.', out);
|
||||
for (int i = 0; i < precision; ++i)
|
||||
writeChar('0', out);
|
||||
writeChar(0, out);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
switch (arguments[0].type->getTypeId())
|
||||
{
|
||||
case TypeIndex::UInt8: return executeType<UInt8>(arguments);
|
||||
case TypeIndex::UInt16: return executeType<UInt16>(arguments);
|
||||
case TypeIndex::UInt32: return executeType<UInt32>(arguments);
|
||||
case TypeIndex::UInt64: return executeType<UInt64>(arguments);
|
||||
case TypeIndex::UInt128: return executeType<UInt128>(arguments);
|
||||
case TypeIndex::UInt256: return executeType<UInt256>(arguments);
|
||||
case TypeIndex::Int8: return executeType<Int8>(arguments);
|
||||
case TypeIndex::Int16: return executeType<Int16>(arguments);
|
||||
case TypeIndex::Int32: return executeType<Int32>(arguments);
|
||||
case TypeIndex::Int64: return executeType<Int64>(arguments);
|
||||
case TypeIndex::Int128: return executeType<Int128>(arguments);
|
||||
case TypeIndex::Int256: return executeType<Int256>(arguments);
|
||||
case TypeIndex::Float32: return executeType<Float32>(arguments);
|
||||
case TypeIndex::Float64: return executeType<Float64>(arguments);
|
||||
case TypeIndex::Decimal32: return executeType<Decimal32>(arguments);
|
||||
case TypeIndex::Decimal64: return executeType<Decimal64>(arguments);
|
||||
case TypeIndex::Decimal128: return executeType<Decimal128>(arguments);
|
||||
case TypeIndex::Decimal256: return executeType<Decimal256>(arguments);
|
||||
default:
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}",
|
||||
arguments[0].column->getName(), getName());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const
|
||||
{
|
||||
const auto * from_col_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
|
||||
const auto * precision_col = checkAndGetColumn<ColumnVector<UInt8>>(arguments[1].column.get());
|
||||
const auto * precision_col_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
|
||||
|
||||
auto result_col = ColumnString::create();
|
||||
auto * result_col_string = assert_cast<ColumnString *>(result_col.get());
|
||||
ColumnString::Chars & result_chars = result_col_string->getChars();
|
||||
ColumnString::Offsets & result_offsets = result_col_string->getOffsets();
|
||||
|
||||
if constexpr (is_decimal<T>)
|
||||
{
|
||||
const auto * from_col = checkAndGetColumn<ColumnDecimal<T>>(arguments[0].column.get());
|
||||
UInt8 from_scale = from_col->getScale();
|
||||
|
||||
if (from_col)
|
||||
{
|
||||
if (precision_col_const)
|
||||
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets, from_scale);
|
||||
else
|
||||
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale);
|
||||
}
|
||||
else if (from_col_const)
|
||||
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets, from_scale);
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto * from_col = checkAndGetColumn<ColumnVector<T>>(arguments[0].column.get());
|
||||
if (from_col)
|
||||
{
|
||||
if (precision_col_const)
|
||||
vectorConstant(from_col->getData(), precision_col_const->template getValue<UInt8>(), result_chars, result_offsets);
|
||||
else
|
||||
vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets);
|
||||
}
|
||||
else if (from_col_const)
|
||||
constantVector(from_col_const->template getValue<T>(), precision_col->getData(), result_chars, result_offsets);
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName());
|
||||
}
|
||||
|
||||
return result_col;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -1,9 +1,12 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -46,7 +49,7 @@ namespace
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Сount how many times each bigram occurs in the text.
|
||||
/// Count how many times each bigram occurs in the text.
|
||||
template <typename ModelMap>
|
||||
ALWAYS_INLINE inline void calculateStats(
|
||||
const UInt8 * data,
|
||||
@ -150,3 +153,5 @@ REGISTER_FUNCTION(DetectCharset)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -5,19 +5,17 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/isValidUTF8.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include <compact_lang_det.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/* Determine language of Unicode UTF-8 text.
|
||||
|
@ -1,4 +1,7 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,4 +1,7 @@
|
||||
#include <Common/FrequencyHolder.h>
|
||||
|
||||
#if USE_NLP
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionsTextClassification.h>
|
||||
@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -4,14 +4,18 @@
|
||||
#include <mutex>
|
||||
|
||||
#include <Core/Types.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <IO/OpenedFile.h>
|
||||
#include <Common/ElapsedTimeProfileEventIncrement.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
|
||||
#include <city.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event OpenedFileCacheHits;
|
||||
extern const Event OpenedFileCacheMisses;
|
||||
extern const Event OpenedFileCacheMicroseconds;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -26,57 +30,79 @@ namespace DB
|
||||
*/
|
||||
class OpenedFileCache
|
||||
{
|
||||
private:
|
||||
using Key = std::pair<std::string /* path */, int /* flags */>;
|
||||
class OpenedFileMap
|
||||
{
|
||||
using Key = std::pair<std::string /* path */, int /* flags */>;
|
||||
|
||||
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
|
||||
using Files = std::map<Key, OpenedFileWeakPtr>;
|
||||
using OpenedFileWeakPtr = std::weak_ptr<OpenedFile>;
|
||||
using Files = std::map<Key, OpenedFileWeakPtr>;
|
||||
|
||||
Files files;
|
||||
std::mutex mutex;
|
||||
Files files;
|
||||
std::mutex mutex;
|
||||
|
||||
public:
|
||||
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
|
||||
|
||||
OpenedFilePtr get(const std::string & path, int flags)
|
||||
{
|
||||
Key key(path, flags);
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
|
||||
if (!inserted)
|
||||
{
|
||||
if (auto res = it->second.lock())
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
|
||||
|
||||
OpenedFilePtr res
|
||||
{
|
||||
new OpenedFile(path, flags),
|
||||
[key, this](auto ptr)
|
||||
{
|
||||
{
|
||||
std::lock_guard another_lock(mutex);
|
||||
files.erase(key);
|
||||
}
|
||||
delete ptr;
|
||||
}
|
||||
};
|
||||
|
||||
it->second = res;
|
||||
return res;
|
||||
}
|
||||
|
||||
void remove(const std::string & path, int flags)
|
||||
{
|
||||
Key key(path, flags);
|
||||
std::lock_guard lock(mutex);
|
||||
files.erase(key);
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr size_t buckets = 1024;
|
||||
std::vector<OpenedFileMap> impls{buckets};
|
||||
|
||||
public:
|
||||
using OpenedFilePtr = std::shared_ptr<OpenedFile>;
|
||||
using OpenedFilePtr = OpenedFileMap::OpenedFilePtr;
|
||||
|
||||
OpenedFilePtr get(const std::string & path, int flags)
|
||||
{
|
||||
Key key(path, flags);
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
auto [it, inserted] = files.emplace(key, OpenedFilePtr{});
|
||||
if (!inserted)
|
||||
{
|
||||
if (auto res = it->second.lock())
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits);
|
||||
return res;
|
||||
}
|
||||
}
|
||||
ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses);
|
||||
|
||||
OpenedFilePtr res
|
||||
{
|
||||
new OpenedFile(path, flags),
|
||||
[key, this](auto ptr)
|
||||
{
|
||||
{
|
||||
std::lock_guard another_lock(mutex);
|
||||
files.erase(key);
|
||||
}
|
||||
delete ptr;
|
||||
}
|
||||
};
|
||||
|
||||
it->second = res;
|
||||
return res;
|
||||
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
|
||||
const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
|
||||
return impls[bucket].get(path, flags);
|
||||
}
|
||||
|
||||
void remove(const std::string & path, int flags)
|
||||
{
|
||||
Key key(path, flags);
|
||||
std::lock_guard lock(mutex);
|
||||
files.erase(key);
|
||||
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::OpenedFileCacheMicroseconds);
|
||||
const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets;
|
||||
impls[bucket].remove(path, flags);
|
||||
}
|
||||
|
||||
static OpenedFileCache & instance()
|
||||
@ -87,5 +113,4 @@ public:
|
||||
};
|
||||
|
||||
using OpenedFileCachePtr = std::shared_ptr<OpenedFileCache>;
|
||||
|
||||
}
|
||||
|
@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf)
|
||||
inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); }
|
||||
|
||||
template <typename T>
|
||||
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
|
||||
bool fixed_fractional_length, UInt32 fractional_length)
|
||||
void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
|
||||
{
|
||||
/// If it's big integer, but the number of digits is small,
|
||||
/// use the implementation for smaller integers for more efficient arithmetic.
|
||||
|
||||
if constexpr (std::is_same_v<T, Int256>)
|
||||
{
|
||||
if (x <= std::numeric_limits<UInt32>::max())
|
||||
{
|
||||
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
|
||||
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
|
||||
return;
|
||||
}
|
||||
else if (x <= std::numeric_limits<UInt64>::max())
|
||||
{
|
||||
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
|
||||
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
|
||||
return;
|
||||
}
|
||||
else if (x <= std::numeric_limits<UInt128>::max())
|
||||
{
|
||||
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
|
||||
writeDecimalFractional(static_cast<UInt128>(x), scale, ostr, trailing_zeros);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -932,36 +932,24 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
|
||||
{
|
||||
if (x <= std::numeric_limits<UInt32>::max())
|
||||
{
|
||||
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
|
||||
writeDecimalFractional(static_cast<UInt32>(x), scale, ostr, trailing_zeros);
|
||||
return;
|
||||
}
|
||||
else if (x <= std::numeric_limits<UInt64>::max())
|
||||
{
|
||||
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
|
||||
writeDecimalFractional(static_cast<UInt64>(x), scale, ostr, trailing_zeros);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr size_t max_digits = std::numeric_limits<UInt256>::digits10;
|
||||
assert(scale <= max_digits);
|
||||
assert(fractional_length <= max_digits);
|
||||
|
||||
char buf[max_digits];
|
||||
memset(buf, '0', std::max(scale, fractional_length));
|
||||
memset(buf, '0', scale);
|
||||
|
||||
T value = x;
|
||||
Int32 last_nonzero_pos = 0;
|
||||
|
||||
if (fixed_fractional_length && fractional_length < scale)
|
||||
{
|
||||
T new_value = value / DecimalUtils::scaleMultiplier<Int256>(scale - fractional_length - 1);
|
||||
auto round_carry = new_value % 10;
|
||||
value = new_value / 10;
|
||||
if (round_carry >= 5)
|
||||
value += 1;
|
||||
}
|
||||
|
||||
for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos)
|
||||
for (Int32 pos = scale - 1; pos >= 0; --pos)
|
||||
{
|
||||
auto remainder = value % 10;
|
||||
value /= 10;
|
||||
@ -973,12 +961,11 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool
|
||||
}
|
||||
|
||||
writeChar('.', ostr);
|
||||
ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1));
|
||||
ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros,
|
||||
bool fixed_fractional_length = false, UInt32 fractional_length = 0)
|
||||
void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros)
|
||||
{
|
||||
T part = DecimalUtils::getWholePart(x, scale);
|
||||
|
||||
@ -989,7 +976,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
|
||||
|
||||
writeIntText(part, ostr);
|
||||
|
||||
if (scale || (fixed_fractional_length && fractional_length > 0))
|
||||
if (scale)
|
||||
{
|
||||
part = DecimalUtils::getFractionalPart(x, scale);
|
||||
if (part || trailing_zeros)
|
||||
@ -997,7 +984,7 @@ void writeText(Decimal<T> x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer
|
||||
if (part < 0)
|
||||
part *= T(-1);
|
||||
|
||||
writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length);
|
||||
writeDecimalFractional(part, scale, ostr, trailing_zeros);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -144,12 +144,6 @@ public:
|
||||
UInt32 shard_index_ = 0,
|
||||
UInt32 replica_index_ = 0);
|
||||
|
||||
Address(
|
||||
const String & host_port_,
|
||||
const ClusterConnectionParameters & params,
|
||||
UInt32 shard_index_,
|
||||
UInt32 replica_index_);
|
||||
|
||||
Address(
|
||||
const DatabaseReplicaInfo & info,
|
||||
const ClusterConnectionParameters & params,
|
||||
|
@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard(
|
||||
{
|
||||
remote_shards.emplace_back(Shard{
|
||||
.query = query_ast,
|
||||
.main_table = main_table,
|
||||
.header = header,
|
||||
.shard_info = shard_info,
|
||||
.lazy = lazy,
|
||||
|
@ -50,6 +50,8 @@ public:
|
||||
{
|
||||
/// Query and header may be changed depending on shard.
|
||||
ASTPtr query;
|
||||
/// Used to check the table existence on remote node
|
||||
StorageID main_table;
|
||||
Block header;
|
||||
|
||||
Cluster::ShardInfo shard_info;
|
||||
|
@ -35,7 +35,12 @@ namespace ErrorCodes
|
||||
namespace ClusterProxy
|
||||
{
|
||||
|
||||
ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log)
|
||||
ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
|
||||
ContextPtr context,
|
||||
const Settings & settings,
|
||||
const StorageID & main_table,
|
||||
const SelectQueryInfo * query_info,
|
||||
Poco::Logger * log)
|
||||
{
|
||||
Settings new_settings = settings;
|
||||
new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
|
||||
@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c
|
||||
/// If "secret" (in remote_servers) is not in use,
|
||||
/// user on the shard is not the same as the user on the initiator,
|
||||
/// hence per-user limits should not be applied.
|
||||
if (cluster.getSecret().empty())
|
||||
if (!interserver_mode)
|
||||
{
|
||||
/// Does not matter on remote servers, because queries are sent under different user.
|
||||
new_settings.max_concurrent_queries_for_user = 0;
|
||||
@ -170,17 +175,15 @@ void executeQuery(
|
||||
std::vector<QueryPlanPtr> plans;
|
||||
SelectStreamFactory::Shards remote_shards;
|
||||
|
||||
auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log);
|
||||
auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log);
|
||||
new_context->increaseDistributedDepth();
|
||||
|
||||
size_t shards = query_info.getCluster()->getShardCount();
|
||||
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
|
||||
{
|
||||
ASTPtr query_ast_for_shard;
|
||||
if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
|
||||
ASTPtr query_ast_for_shard = query_ast->clone();
|
||||
if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
|
||||
{
|
||||
query_ast_for_shard = query_ast->clone();
|
||||
|
||||
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
|
||||
sharding_key_expr,
|
||||
sharding_key_expr->getSampleBlock().getByPosition(0).type,
|
||||
@ -191,8 +194,6 @@ void executeQuery(
|
||||
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
|
||||
visitor.visit(query_ast_for_shard);
|
||||
}
|
||||
else
|
||||
query_ast_for_shard = query_ast->clone();
|
||||
|
||||
if (shard_filter_generator)
|
||||
{
|
||||
|
@ -34,8 +34,12 @@ class SelectStreamFactory;
|
||||
/// - optimize_skip_unused_shards_nesting
|
||||
///
|
||||
/// @return new Context with adjusted settings
|
||||
ContextMutablePtr updateSettingsForCluster(
|
||||
const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
|
||||
ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
|
||||
ContextPtr context,
|
||||
const Settings & settings,
|
||||
const StorageID & main_table,
|
||||
const SelectQueryInfo * query_info = nullptr,
|
||||
Poco::Logger * log = nullptr);
|
||||
|
||||
using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
|
||||
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.
|
||||
|
@ -2274,8 +2274,7 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
|
||||
&& !settings.allow_experimental_query_deduplication
|
||||
&& !settings.empty_result_for_aggregation_by_empty_set
|
||||
&& storage
|
||||
&& storage->getName() != "MaterializedMySQL"
|
||||
&& !storage->hasLightweightDeletedMask()
|
||||
&& storage->supportsTrivialCountOptimization()
|
||||
&& query_info.filter_asts.empty()
|
||||
&& query_analyzer->hasAggregation()
|
||||
&& (query_analyzer->aggregates().size() == 1)
|
||||
|
@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta
|
||||
}
|
||||
else if (const auto * explain_query = ast->as<ASTExplainQuery>())
|
||||
{
|
||||
if (explain_query->settings_ast)
|
||||
InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext();
|
||||
|
||||
applySettingsFromQuery(explain_query->getExplainedQuery(), context_);
|
||||
}
|
||||
else if (const auto * query_with_output = dynamic_cast<const ASTQueryWithOutput *>(ast.get()))
|
||||
|
@ -1900,6 +1900,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
|
||||
}
|
||||
|
||||
|
||||
bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral)
|
||||
return false;
|
||||
String s;
|
||||
ReadBufferFromMemory in(pos->begin, pos->size());
|
||||
try
|
||||
{
|
||||
if (pos->type == TokenType::StringLiteral)
|
||||
readQuotedStringWithSQLStyle(s, in);
|
||||
else
|
||||
readDoubleQuotedStringWithSQLStyle(s, in);
|
||||
}
|
||||
catch (const Exception &)
|
||||
{
|
||||
expected.add(pos, "string literal or double quoted string");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (in.count() != pos->size())
|
||||
{
|
||||
expected.add(pos, "string literal or double quoted string");
|
||||
return false;
|
||||
}
|
||||
|
||||
auto literal = std::make_shared<ASTLiteral>(s);
|
||||
literal->begin = pos;
|
||||
literal->end = ++pos;
|
||||
node = literal;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
{
|
||||
if (pos->type != TokenType::DoubleAt)
|
||||
|
@ -367,6 +367,21 @@ protected:
|
||||
};
|
||||
|
||||
|
||||
/** MySQL comment:
|
||||
* CREATE TABLE t (
|
||||
* i INT PRIMARY KEY,
|
||||
* first_name VARCHAR(255) COMMENT 'FIRST_NAME',
|
||||
* last_name VARCHAR(255) COMMENT "LAST_NAME"
|
||||
* )
|
||||
*/
|
||||
class ParserMySQLComment : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "MySQL comment parser"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
};
|
||||
|
||||
|
||||
/** MySQL-style global variable: @@var
|
||||
*/
|
||||
class ParserMySQLGlobalVariable : public IParserBase
|
||||
|
@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node,
|
||||
OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique<ParserAlwaysTrue>()),
|
||||
OptionDescribe("UNIQUE", "unique_key", std::make_unique<ParserAlwaysTrue>()),
|
||||
OptionDescribe("KEY", "primary_key", std::make_unique<ParserAlwaysTrue>()),
|
||||
OptionDescribe("COMMENT", "comment", std::make_unique<ParserStringLiteral>()),
|
||||
OptionDescribe("COMMENT", "comment", std::make_unique<ParserMySQLComment>()),
|
||||
OptionDescribe("CHARACTER SET", "charset_name", std::make_unique<ParserCharsetOrCollateName>()),
|
||||
OptionDescribe("CHARSET", "charset", std::make_unique<ParserCharsetOrCollateName>()),
|
||||
OptionDescribe("COLLATE", "collate", std::make_unique<ParserCharsetOrCollateName>()),
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Parsers/CommonParsers.h>
|
||||
#include <Parsers/ParserDescribeTableQuery.h>
|
||||
#include <Parsers/ParserTablesInSelectQuery.h>
|
||||
#include <Parsers/ParserSetQuery.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
@ -16,8 +17,10 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
|
||||
ParserKeyword s_describe("DESCRIBE");
|
||||
ParserKeyword s_desc("DESC");
|
||||
ParserKeyword s_table("TABLE");
|
||||
ParserKeyword s_settings("SETTINGS");
|
||||
ParserToken s_dot(TokenType::Dot);
|
||||
ParserIdentifier name_p;
|
||||
ParserSetQuery parser_settings(true);
|
||||
|
||||
ASTPtr database;
|
||||
ASTPtr table;
|
||||
@ -29,12 +32,21 @@ bool ParserDescribeTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ex
|
||||
|
||||
s_table.ignore(pos, expected);
|
||||
|
||||
ASTPtr table_expression;
|
||||
if (!ParserTableExpression().parse(pos, table_expression, expected))
|
||||
if (!ParserTableExpression().parse(pos, query->table_expression, expected))
|
||||
return false;
|
||||
|
||||
query->children.push_back(std::move(table_expression));
|
||||
query->table_expression = query->children.back();
|
||||
/// For compatibility with SELECTs, where SETTINGS can be in front of FORMAT
|
||||
ASTPtr settings;
|
||||
if (s_settings.ignore(pos, expected))
|
||||
{
|
||||
if (!parser_settings.parse(pos, query->settings_ast, expected))
|
||||
return false;
|
||||
}
|
||||
|
||||
query->children.push_back(query->table_expression);
|
||||
|
||||
if (query->settings_ast)
|
||||
query->children.push_back(query->settings_ast);
|
||||
|
||||
node = query;
|
||||
|
||||
|
@ -156,7 +156,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
|
||||
|
||||
// SETTINGS key1 = value1, key2 = value2, ...
|
||||
ParserKeyword s_settings("SETTINGS");
|
||||
if (s_settings.ignore(pos, expected))
|
||||
if (!query_with_output.settings_ast && s_settings.ignore(pos, expected))
|
||||
{
|
||||
ParserSetQuery parser_settings(true);
|
||||
if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
|
||||
|
@ -14,8 +14,6 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
|
||||
{
|
||||
ParserKeyword s_exists("EXISTS");
|
||||
ParserKeyword s_temporary("TEMPORARY");
|
||||
ParserKeyword s_describe("DESCRIBE");
|
||||
ParserKeyword s_desc("DESC");
|
||||
ParserKeyword s_show("SHOW");
|
||||
ParserKeyword s_create("CREATE");
|
||||
ParserKeyword s_database("DATABASE");
|
||||
|
@ -182,6 +182,9 @@ bool applyTrivialCountIfPossible(
|
||||
return false;
|
||||
|
||||
const auto & storage = table_node.getStorage();
|
||||
if (!storage->supportsTrivialCountOptimization())
|
||||
return false;
|
||||
|
||||
auto storage_id = storage->getStorageID();
|
||||
auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(),
|
||||
storage_id.getTableName(),
|
||||
|
@ -162,7 +162,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream
|
||||
if (my_table_func_ptr)
|
||||
try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY);
|
||||
else
|
||||
try_results = my_shard.shard_info.pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName());
|
||||
try_results = my_shard.shard_info.pool->getManyChecked(
|
||||
timeouts, ¤t_settings, PoolMode::GET_MANY,
|
||||
my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName());
|
||||
}
|
||||
catch (const Exception & ex)
|
||||
{
|
||||
@ -241,7 +243,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact
|
||||
remote_query_executor->setPoolMode(PoolMode::GET_MANY);
|
||||
|
||||
if (!table_func_ptr)
|
||||
remote_query_executor->setMainTable(main_table);
|
||||
remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table);
|
||||
|
||||
pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending));
|
||||
addConvertingActions(pipes.back(), output_stream->header);
|
||||
|
@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr<Throttler>;
|
||||
class ReadFromRemote final : public ISourceStep
|
||||
{
|
||||
public:
|
||||
/// @param main_table_ if Shards contains main_table then this parameter will be ignored
|
||||
ReadFromRemote(
|
||||
ClusterProxy::SelectStreamFactory::Shards shards_,
|
||||
Block header_,
|
||||
|
@ -6,10 +6,18 @@
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <Common/getResource.h>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <incbin.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/// Embedded HTML pages
|
||||
INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html");
|
||||
INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html");
|
||||
INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js");
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -34,13 +42,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
|
||||
if (request.getURI().starts_with("/play"))
|
||||
{
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||
*response.send() << getResource("play.html");
|
||||
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_play_htmlData), gresource_play_htmlSize);
|
||||
}
|
||||
else if (request.getURI().starts_with("/dashboard"))
|
||||
{
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||
|
||||
std::string html(getResource("dashboard.html"));
|
||||
std::string html(reinterpret_cast<const char *>(gresource_dashboard_htmlData), gresource_dashboard_htmlSize);
|
||||
|
||||
/// Replace a link to external JavaScript file to embedded file.
|
||||
/// This allows to open the HTML without running a server and to host it on server.
|
||||
@ -55,7 +63,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR
|
||||
else if (request.getURI() == "/js/uplot.js")
|
||||
{
|
||||
response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK);
|
||||
*response.send() << getResource("js/uplot.js");
|
||||
*response.send() << std::string_view(reinterpret_cast<const char *>(gresource_uplot_jsData), gresource_uplot_jsSize);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -254,6 +254,9 @@ public:
|
||||
/// because those are internally translated into 'ALTER UDPATE' mutations.
|
||||
virtual bool supportsDelete() const { return false; }
|
||||
|
||||
/// Return true if the trivial count query could be optimized without reading the data at all.
|
||||
virtual bool supportsTrivialCountOptimization() const { return false; }
|
||||
|
||||
private:
|
||||
|
||||
StorageID storage_id;
|
||||
|
@ -434,6 +434,8 @@ public:
|
||||
|
||||
bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; }
|
||||
|
||||
bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override;
|
||||
|
@ -60,7 +60,6 @@
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/InterpreterDescribeQuery.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
@ -75,6 +74,7 @@
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Interpreters/RequiredSourceColumnsVisitor.h>
|
||||
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
|
||||
#include <Interpreters/getHeaderForProcessingStage.h>
|
||||
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
@ -434,7 +434,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
|
||||
{
|
||||
/// Always calculate optimized cluster here, to avoid conditions during read()
|
||||
/// (Anyway it will be calculated in the read())
|
||||
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query);
|
||||
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info);
|
||||
if (optimized_cluster)
|
||||
{
|
||||
LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
|
||||
@ -1297,7 +1297,7 @@ ClusterPtr StorageDistributed::getCluster() const
|
||||
}
|
||||
|
||||
ClusterPtr StorageDistributed::getOptimizedCluster(
|
||||
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const
|
||||
ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const
|
||||
{
|
||||
ClusterPtr cluster = getCluster();
|
||||
const Settings & settings = local_context->getSettingsRef();
|
||||
@ -1306,7 +1306,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(
|
||||
|
||||
if (has_sharding_key && sharding_key_is_usable)
|
||||
{
|
||||
ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context);
|
||||
ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context);
|
||||
if (optimized)
|
||||
return optimized;
|
||||
}
|
||||
@ -1355,25 +1355,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c
|
||||
/// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr`
|
||||
ClusterPtr StorageDistributed::skipUnusedShards(
|
||||
ClusterPtr cluster,
|
||||
const ASTPtr & query_ptr,
|
||||
const SelectQueryInfo & query_info,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
ContextPtr local_context) const
|
||||
{
|
||||
const auto & select = query_ptr->as<ASTSelectQuery &>();
|
||||
|
||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
||||
if (!select.prewhere() && !select.where())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// FIXME: support analyzer
|
||||
if (!query_info.syntax_analyzer_result)
|
||||
return nullptr;
|
||||
|
||||
ASTPtr condition_ast;
|
||||
if (select.prewhere() && select.where())
|
||||
/// Remove JOIN from the query since it may contain a condition for other tables.
|
||||
/// But only the conditions for the left table should be analyzed for shard skipping.
|
||||
{
|
||||
condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone());
|
||||
}
|
||||
else
|
||||
{
|
||||
condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone();
|
||||
ASTPtr select_without_join_ptr = select.clone();
|
||||
ASTSelectQuery select_without_join = select_without_join_ptr->as<ASTSelectQuery &>();
|
||||
TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result;
|
||||
|
||||
removeJoin(select_without_join, analyzer_result_without_join, local_context);
|
||||
if (!select_without_join.prewhere() && !select_without_join.where())
|
||||
return nullptr;
|
||||
|
||||
if (select_without_join.prewhere() && select_without_join.where())
|
||||
condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone());
|
||||
else
|
||||
condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone();
|
||||
}
|
||||
|
||||
replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot);
|
||||
@ -1396,11 +1405,9 @@ ClusterPtr StorageDistributed::skipUnusedShards(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Can't get definite answer if we can skip any shards
|
||||
// Can't get a definite answer if we can skip any shards
|
||||
if (!blocks)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::set<int> shards;
|
||||
|
||||
|
@ -182,10 +182,10 @@ private:
|
||||
/// Apply the following settings:
|
||||
/// - optimize_skip_unused_shards
|
||||
/// - force_optimize_skip_unused_shards
|
||||
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const;
|
||||
ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const;
|
||||
|
||||
ClusterPtr skipUnusedShards(
|
||||
ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
|
||||
ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const;
|
||||
|
||||
/// This method returns optimal query processing stage.
|
||||
///
|
||||
|
@ -41,6 +41,8 @@ public:
|
||||
|
||||
void drop() override { nested_storage->drop(); }
|
||||
|
||||
bool supportsTrivialCountOptimization() const override { return false; }
|
||||
|
||||
private:
|
||||
[[noreturn]] static void throwNotAllowed()
|
||||
{
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <Processors/Sinks/SinkToStorage.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include <Common/parseRemoteDescription.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Storages/NamedCollectionsHelpers.h>
|
||||
#include <Databases/MySQL/FetchTablesColumnsList.h>
|
||||
@ -34,16 +35,6 @@ namespace ErrorCodes
|
||||
extern const int UNKNOWN_TABLE;
|
||||
}
|
||||
|
||||
static String backQuoteMySQL(const String & x)
|
||||
{
|
||||
String res(x.size(), '\0');
|
||||
{
|
||||
WriteBufferFromString wb(res);
|
||||
writeBackQuotedStringMySQL(x, wb);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
StorageMySQL::StorageMySQL(
|
||||
const StorageID & table_id_,
|
||||
mysqlxx::PoolWithFailover && pool_,
|
||||
|
@ -4902,67 +4902,102 @@ void StorageReplicatedMergeTree::read(
|
||||
snapshot_data.alter_conversions = {};
|
||||
});
|
||||
|
||||
/** The `select_sequential_consistency` setting has two meanings:
|
||||
* 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas.
|
||||
* 2. Do not read parts that have not yet been written to the quorum of the replicas.
|
||||
* For this you have to synchronously go to ZooKeeper.
|
||||
*/
|
||||
if (local_context->getSettingsRef().select_sequential_consistency)
|
||||
{
|
||||
auto max_added_blocks = std::make_shared<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock>(getMaxAddedBlocks());
|
||||
if (auto plan = reader.read(
|
||||
column_names, storage_snapshot, query_info, local_context,
|
||||
max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false))
|
||||
query_plan = std::move(*plan);
|
||||
return;
|
||||
}
|
||||
const auto & settings = local_context->getSettingsRef();
|
||||
|
||||
/// The `select_sequential_consistency` setting has two meanings:
|
||||
/// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas.
|
||||
/// 2. Do not read parts that have not yet been written to the quorum of the replicas.
|
||||
/// For this you have to synchronously go to ZooKeeper.
|
||||
if (settings.select_sequential_consistency)
|
||||
return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
|
||||
|
||||
if (local_context->canUseParallelReplicasOnInitiator())
|
||||
return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
|
||||
|
||||
readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams);
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl(
|
||||
QueryPlan & query_plan,
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
size_t num_streams)
|
||||
{
|
||||
auto max_added_blocks = std::make_shared<ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock>(getMaxAddedBlocks());
|
||||
auto plan = reader.read(column_names, storage_snapshot, query_info, local_context,
|
||||
max_block_size, num_streams, processed_stage, std::move(max_added_blocks),
|
||||
/* enable_parallel_reading= */false);
|
||||
if (plan)
|
||||
query_plan = std::move(*plan);
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::readParallelReplicasImpl(
|
||||
QueryPlan & query_plan,
|
||||
const Names & /*column_names*/,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
const size_t /*max_block_size*/,
|
||||
const size_t /*num_streams*/)
|
||||
{
|
||||
auto table_id = getStorageID();
|
||||
|
||||
auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas);
|
||||
|
||||
ASTPtr modified_query_ast;
|
||||
Block header;
|
||||
if (local_context->getSettingsRef().allow_experimental_analyzer)
|
||||
{
|
||||
auto table_id = getStorageID();
|
||||
auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
|
||||
|
||||
ASTPtr modified_query_ast;
|
||||
|
||||
Block header;
|
||||
|
||||
if (local_context->getSettingsRef().allow_experimental_analyzer)
|
||||
{
|
||||
auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree);
|
||||
|
||||
header = InterpreterSelectQueryAnalyzer::getSampleBlock(
|
||||
modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
|
||||
modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
|
||||
}
|
||||
else
|
||||
{
|
||||
modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
|
||||
table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
|
||||
header
|
||||
= InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
||||
}
|
||||
|
||||
auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas);
|
||||
|
||||
ClusterProxy::SelectStreamFactory select_stream_factory =
|
||||
ClusterProxy::SelectStreamFactory(
|
||||
header,
|
||||
{},
|
||||
storage_snapshot,
|
||||
processed_stage);
|
||||
|
||||
ClusterProxy::executeQueryWithParallelReplicas(
|
||||
query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr,
|
||||
select_stream_factory, modified_query_ast,
|
||||
local_context, query_info, cluster);
|
||||
header = InterpreterSelectQueryAnalyzer::getSampleBlock(
|
||||
modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze());
|
||||
modified_query_ast = queryNodeToSelectQuery(modified_query_tree);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (auto plan = reader.read(
|
||||
column_names, storage_snapshot, query_info,
|
||||
local_context, max_block_size, num_streams,
|
||||
processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower()))
|
||||
query_plan = std::move(*plan);
|
||||
modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query,
|
||||
table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr);
|
||||
header
|
||||
= InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
||||
}
|
||||
|
||||
ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory(
|
||||
header,
|
||||
{},
|
||||
storage_snapshot,
|
||||
processed_stage);
|
||||
|
||||
ClusterProxy::executeQueryWithParallelReplicas(
|
||||
query_plan, getStorageID(),
|
||||
/* table_func_ptr= */ nullptr,
|
||||
select_stream_factory, modified_query_ast,
|
||||
local_context, query_info, parallel_replicas_cluster);
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::readLocalImpl(
|
||||
QueryPlan & query_plan,
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
const size_t max_block_size,
|
||||
const size_t num_streams)
|
||||
{
|
||||
auto plan = reader.read(
|
||||
column_names, storage_snapshot, query_info,
|
||||
local_context, max_block_size, num_streams,
|
||||
processed_stage,
|
||||
/* max_block_numbers_to_read= */ nullptr,
|
||||
/* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower());
|
||||
if (plan)
|
||||
query_plan = std::move(*plan);
|
||||
}
|
||||
|
||||
template <class Func>
|
||||
|
@ -130,7 +130,7 @@ public:
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
size_t num_streams) override;
|
||||
@ -513,6 +513,36 @@ private:
|
||||
|
||||
static std::optional<QueryPipeline> distributedWriteFromClusterStorage(const std::shared_ptr<IStorageCluster> & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context);
|
||||
|
||||
void readLocalImpl(
|
||||
QueryPlan & query_plan,
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
size_t num_streams);
|
||||
|
||||
void readLocalSequentialConsistencyImpl(
|
||||
QueryPlan & query_plan,
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
size_t num_streams);
|
||||
|
||||
void readParallelReplicasImpl(
|
||||
QueryPlan & query_plan,
|
||||
const Names & column_names,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr local_context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t max_block_size,
|
||||
size_t num_streams);
|
||||
|
||||
template <class Func>
|
||||
void foreachActiveParts(Func && func, bool select_sequential_consistency) const;
|
||||
|
||||
|
@ -30,7 +30,6 @@ endif()
|
||||
add_dependencies(generate-source generate-contributors)
|
||||
|
||||
set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp")
|
||||
set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT StorageSystemLicenses.generated.cpp
|
||||
@ -38,23 +37,13 @@ add_custom_command(
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC})
|
||||
list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC})
|
||||
|
||||
# Overlength strings
|
||||
set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w)
|
||||
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
|
||||
clickhouse_embed_binaries(
|
||||
TARGET information_schema_metadata
|
||||
RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/"
|
||||
RESOURCES schemata.sql tables.sql views.sql columns.sql
|
||||
)
|
||||
|
||||
list (SORT storages_system_sources) # Reproducible build
|
||||
add_library(clickhouse_storages_system ${storages_system_sources})
|
||||
|
||||
add_dependencies(clickhouse_storages_system information_schema_metadata)
|
||||
|
||||
target_link_libraries(clickhouse_storages_system PRIVATE
|
||||
dbms
|
||||
common
|
||||
@ -62,5 +51,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE
|
||||
clickhouse_common_zookeeper
|
||||
clickhouse_parsers
|
||||
Poco::JSON
|
||||
INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:information_schema_metadata> -Wl,${NO_WHOLE_ARCHIVE}"
|
||||
)
|
||||
|
||||
target_include_directories(clickhouse_storages_system PRIVATE InformationSchema)
|
||||
|
@ -3,14 +3,23 @@
|
||||
#include <Storages/System/attachSystemTablesImpl.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Common/getResource.h>
|
||||
#include <incbin.h>
|
||||
|
||||
#include "config.h"
|
||||
|
||||
/// Embedded SQL definitions
|
||||
INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql");
|
||||
INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql");
|
||||
INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql");
|
||||
INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql");
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt
|
||||
|
||||
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name)
|
||||
static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -21,12 +30,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
||||
bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE;
|
||||
|
||||
String metadata_resource_name = view_name + ".sql";
|
||||
auto attach_query = getResource(metadata_resource_name);
|
||||
if (attach_query.empty())
|
||||
if (query.empty())
|
||||
return;
|
||||
|
||||
ParserCreateQuery parser;
|
||||
ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(),
|
||||
ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(),
|
||||
"Attach query from embedded resource " + metadata_resource_name,
|
||||
DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
|
||||
@ -50,10 +58,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d
|
||||
|
||||
void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database)
|
||||
{
|
||||
createInformationSchemaView(context, information_schema_database, "schemata");
|
||||
createInformationSchemaView(context, information_schema_database, "tables");
|
||||
createInformationSchemaView(context, information_schema_database, "views");
|
||||
createInformationSchemaView(context, information_schema_database, "columns");
|
||||
createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast<const char *>(gresource_schemata_sqlData), gresource_schemata_sqlSize));
|
||||
createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast<const char *>(gresource_tables_sqlData), gresource_tables_sqlSize));
|
||||
createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast<const char *>(gresource_views_sqlData), gresource_views_sqlSize));
|
||||
createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast<const char *>(gresource_columns_sqlData), gresource_columns_sqlSize));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ClusterProxy/executeQuery.h>
|
||||
#include <Interpreters/InterpreterDescribeQuery.h>
|
||||
#include <QueryPipeline/RemoteQueryExecutor.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
@ -58,7 +57,7 @@ ColumnsDescription getStructureOfRemoteTableInShard(
|
||||
}
|
||||
|
||||
ColumnsDescription res;
|
||||
auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id);
|
||||
auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id);
|
||||
|
||||
/// Ignore limit for result number of rows (that could be set during handling CSE/CTE),
|
||||
/// since this is a service query and should not lead to query failure.
|
||||
@ -177,7 +176,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables(
|
||||
const auto & shards_info = cluster.getShardsInfo();
|
||||
auto query = "DESC TABLE " + remote_table_id.getFullTableName();
|
||||
|
||||
auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id);
|
||||
auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id);
|
||||
new_context->setSetting("describe_extend_object_types", true);
|
||||
|
||||
/// Expect only needed columns from the result of DESC TABLE.
|
||||
|
@ -264,7 +264,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr
|
||||
secure,
|
||||
/* priority= */ Priority{1},
|
||||
/* cluster_name= */ "",
|
||||
/* password= */ ""
|
||||
/* cluster_secret= */ ""
|
||||
};
|
||||
cluster = std::make_shared<Cluster>(context->getSettingsRef(), names, params);
|
||||
}
|
||||
|
@ -162,3 +162,5 @@ endif ()
|
||||
if (TARGET ch_contrib::fiu)
|
||||
set(FIU_ENABLE 1)
|
||||
endif()
|
||||
|
||||
set(SOURCE_DIR ${CMAKE_SOURCE_DIR})
|
||||
|
@ -130,4 +130,6 @@
|
||||
02581_share_big_sets_between_mutation_tasks_long
|
||||
02581_share_big_sets_between_multiple_mutations_tasks_long
|
||||
00992_system_parts_race_condition_zookeeper_long
|
||||
02790_optimize_skip_unused_shards_join
|
||||
01940_custom_tld_sharding_key
|
||||
02815_range_dict_no_direct_join
|
||||
|
@ -529,6 +529,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val):
|
||||
return gen
|
||||
|
||||
|
||||
# To keep dependency list as short as possible, tzdata is not used here (to
|
||||
# avoid try/except block for import)
|
||||
def get_localzone():
|
||||
return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:]))
|
||||
|
||||
|
||||
class SettingsRandomizer:
|
||||
settings = {
|
||||
"max_insert_threads": lambda: 0
|
||||
@ -602,20 +608,33 @@ class SettingsRandomizer:
|
||||
"enable_memory_bound_merging_of_aggregation_results": lambda: random.randint(
|
||||
0, 1
|
||||
),
|
||||
"session_timezone": lambda: random.choice(
|
||||
[
|
||||
# special non-deterministic around 1970 timezone, see [1].
|
||||
#
|
||||
# [1]: https://github.com/ClickHouse/ClickHouse/issues/42653
|
||||
"America/Mazatlan",
|
||||
"America/Hermosillo",
|
||||
"Mexico/BajaSur",
|
||||
# server default that is randomized across all timezones
|
||||
# NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same.
|
||||
get_localzone(),
|
||||
]
|
||||
),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def get_random_settings(args):
|
||||
random_settings = []
|
||||
random_settings = {}
|
||||
is_debug = BuildFlags.DEBUG in args.build_flags
|
||||
for setting, generator in SettingsRandomizer.settings.items():
|
||||
if (
|
||||
is_debug
|
||||
and setting == "allow_prefetched_read_pool_for_remote_filesystem"
|
||||
):
|
||||
random_settings.append(f"{setting}=0")
|
||||
random_settings[setting] = 0
|
||||
else:
|
||||
random_settings.append(f"{setting}={generator()}")
|
||||
random_settings[setting] = generator()
|
||||
return random_settings
|
||||
|
||||
|
||||
@ -651,10 +670,10 @@ class MergeTreeSettingsRandomizer:
|
||||
|
||||
@staticmethod
|
||||
def get_random_settings(args):
|
||||
random_settings = []
|
||||
random_settings = {}
|
||||
for setting, generator in MergeTreeSettingsRandomizer.settings.items():
|
||||
if setting not in args.changed_merge_tree_settings:
|
||||
random_settings.append(f"{setting}={generator()}")
|
||||
random_settings[setting] = generator()
|
||||
return random_settings
|
||||
|
||||
|
||||
@ -766,7 +785,14 @@ class TestCase:
|
||||
|
||||
@staticmethod
|
||||
def cli_format_settings(settings_list) -> str:
|
||||
return " ".join([f"--{setting}" for setting in settings_list])
|
||||
out = []
|
||||
for k, v in settings_list.items():
|
||||
out.extend([f"--{k}", str(v)])
|
||||
return " ".join(out)
|
||||
|
||||
@staticmethod
|
||||
def http_format_settings(settings_list) -> str:
|
||||
return urllib.parse.urlencode(settings_list)
|
||||
|
||||
def has_show_create_table_in_test(self):
|
||||
return not subprocess.call(["grep", "-iq", "show create", self.case_file])
|
||||
@ -774,11 +800,12 @@ class TestCase:
|
||||
def add_random_settings(self, client_options):
|
||||
new_options = ""
|
||||
if self.randomize_settings:
|
||||
http_params = self.http_format_settings(self.random_settings)
|
||||
if len(self.base_url_params) == 0:
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings)
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = http_params
|
||||
else:
|
||||
os.environ["CLICKHOUSE_URL_PARAMS"] = (
|
||||
self.base_url_params + "&" + "&".join(self.random_settings)
|
||||
self.base_url_params + "&" + http_params
|
||||
)
|
||||
|
||||
new_options += f" {self.cli_format_settings(self.random_settings)}"
|
||||
|
@ -3199,6 +3199,7 @@ class ClickHouseInstance:
|
||||
):
|
||||
self.name = name
|
||||
self.base_cmd = cluster.base_cmd
|
||||
self.base_dir = base_path
|
||||
self.docker_id = cluster.get_instance_docker_id(self.name)
|
||||
self.cluster = cluster
|
||||
self.hostname = hostname if hostname is not None else self.name
|
||||
@ -4193,6 +4194,14 @@ class ClickHouseInstance:
|
||||
["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]
|
||||
)
|
||||
|
||||
def put_users_config(self, config_path):
|
||||
"""Put new config (useful if you cannot put it at the start)"""
|
||||
|
||||
instance_config_dir = p.abspath(p.join(self.path, "configs"))
|
||||
users_d_dir = p.abspath(p.join(instance_config_dir, "users.d"))
|
||||
config_path = p.join(self.base_dir, config_path)
|
||||
shutil.copy(config_path, users_d_dir)
|
||||
|
||||
def create_dir(self):
|
||||
"""Create the instance directory and all the needed files there."""
|
||||
|
||||
|
@ -1581,6 +1581,128 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE utf8mb4_test")
|
||||
|
||||
|
||||
def utf8mb4_column_test(clickhouse_node, mysql_node, service_name):
|
||||
db = "utf8mb4_column_test"
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
mysql_node.query(f"CREATE DATABASE {db}")
|
||||
|
||||
# Full sync
|
||||
mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)")
|
||||
mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)")
|
||||
mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())")
|
||||
mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())")
|
||||
clickhouse_node.query(
|
||||
f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
|
||||
)
|
||||
|
||||
# Full sync replicated unquoted columns names since they use SHOW CREATE TABLE
|
||||
# which returns quoted column names
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}",
|
||||
"quoted\nunquoted\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted",
|
||||
"1\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted",
|
||||
"1\n",
|
||||
)
|
||||
|
||||
# Inc sync
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)"
|
||||
)
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)"
|
||||
)
|
||||
mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())")
|
||||
mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())")
|
||||
mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())")
|
||||
mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())")
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted",
|
||||
"2\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new",
|
||||
"1\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted",
|
||||
"2\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new",
|
||||
"1\n",
|
||||
)
|
||||
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
|
||||
|
||||
|
||||
def utf8mb4_name_test(clickhouse_node, mysql_node, service_name):
|
||||
db = "您Hi您"
|
||||
table = "日期"
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
|
||||
mysql_node.query(f"CREATE DATABASE `{db}`")
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
|
||||
)
|
||||
mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())")
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
|
||||
)
|
||||
mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())")
|
||||
clickhouse_node.query(
|
||||
f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`",
|
||||
"1\n",
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`",
|
||||
"1\n",
|
||||
)
|
||||
|
||||
# Inc sync
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
|
||||
)
|
||||
mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())")
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`",
|
||||
"1\n",
|
||||
)
|
||||
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4"
|
||||
)
|
||||
mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())")
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`",
|
||||
"1\n",
|
||||
)
|
||||
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`")
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`")
|
||||
|
||||
|
||||
def system_parts_test(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS system_parts_test")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test")
|
||||
@ -1701,6 +1823,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_
|
||||
mysql_node.query("DROP DATABASE materialized_with_column_comments_test")
|
||||
|
||||
|
||||
def double_quoted_comment(clickhouse_node, mysql_node, service_name):
|
||||
db = "comment_db"
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
mysql_node.query(f"CREATE DATABASE {db}")
|
||||
mysql_node.query(
|
||||
f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")'
|
||||
)
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')"
|
||||
)
|
||||
clickhouse_node.query(
|
||||
f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')"
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node,
|
||||
f"SHOW TABLES FROM {db} FORMAT TSV",
|
||||
"t1\nt2\n",
|
||||
)
|
||||
|
||||
# incremental
|
||||
mysql_node.query(
|
||||
f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")'
|
||||
)
|
||||
mysql_node.query(
|
||||
f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')"
|
||||
)
|
||||
check_query(
|
||||
clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n"
|
||||
)
|
||||
|
||||
clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
mysql_node.query(f"DROP DATABASE IF EXISTS {db}")
|
||||
|
||||
|
||||
def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test")
|
||||
|
@ -381,6 +381,12 @@ def test_utf8mb4(
|
||||
):
|
||||
materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57")
|
||||
materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80")
|
||||
materialized_with_ddl.utf8mb4_column_test(
|
||||
clickhouse_node, started_mysql_8_0, "mysql80"
|
||||
)
|
||||
materialized_with_ddl.utf8mb4_name_test(
|
||||
clickhouse_node, started_mysql_8_0, "mysql80"
|
||||
)
|
||||
|
||||
|
||||
def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node):
|
||||
@ -422,6 +428,12 @@ def test_materialized_with_column_comments(
|
||||
)
|
||||
|
||||
|
||||
def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node):
|
||||
materialized_with_ddl.double_quoted_comment(
|
||||
clickhouse_node, started_mysql_8_0, "mysql80"
|
||||
)
|
||||
|
||||
|
||||
def test_materialized_with_enum(
|
||||
started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node
|
||||
):
|
||||
|
@ -114,7 +114,10 @@ def node_update_config(mode, setting, value=None):
|
||||
|
||||
|
||||
def assert_took(took, should_took):
|
||||
assert took >= should_took[0] * 0.9 and took < should_took[1]
|
||||
# we need to decrease the lower limit because the server limits could
|
||||
# be enforced by throttling some server background IO instead of query IO
|
||||
# and we have no control over it
|
||||
assert took >= should_took[0] * 0.85 and took < should_took[1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -0,0 +1,7 @@
|
||||
<clickhouse>
|
||||
<profiles>
|
||||
<default>
|
||||
<force_remove_data_recursively_on_drop>1</force_remove_data_recursively_on_drop>
|
||||
</default>
|
||||
</profiles>
|
||||
</clickhouse>
|
@ -51,6 +51,12 @@ def start_cluster():
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def restart_node(node):
|
||||
# set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old)
|
||||
node.put_users_config("configs/force_remove_data_recursively_on_drop.xml")
|
||||
node.restart_with_latest_version(signal=9, fix_metadata=True)
|
||||
|
||||
|
||||
def test_mutate_and_upgrade(start_cluster):
|
||||
for node in [node1, node2]:
|
||||
node.query("DROP TABLE IF EXISTS mt")
|
||||
@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster):
|
||||
|
||||
node2.query("DETACH TABLE mt") # stop being leader
|
||||
node1.query("DETACH TABLE mt") # stop being leader
|
||||
node1.restart_with_latest_version(signal=9, fix_metadata=True)
|
||||
node2.restart_with_latest_version(signal=9, fix_metadata=True)
|
||||
|
||||
restart_node(node1)
|
||||
restart_node(node2)
|
||||
|
||||
# After hard restart table can be in readonly mode
|
||||
exec_query_with_retry(
|
||||
@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster):
|
||||
# (We could be in process of creating some system table, which will leave empty directory on restart,
|
||||
# so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files)
|
||||
node3.query("SYSTEM FLUSH LOGS")
|
||||
node3.restart_with_latest_version(signal=9, fix_metadata=True)
|
||||
restart_node(node3)
|
||||
|
||||
# checks for readonly
|
||||
exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60)
|
||||
|
@ -5,4 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
env TZ=UTC ${CLICKHOUSE_CLIENT} --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)"
|
||||
# NOTE: session_timezone overrides use_client_time_zone, disable it randomization
|
||||
env TZ=UTC ${CLICKHOUSE_CLIENT} --session_timezone '' --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)"
|
||||
|
@ -7,11 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
function perform()
|
||||
{
|
||||
local query=$1
|
||||
TZ=UTC $CLICKHOUSE_CLIENT \
|
||||
--allow_deprecated_syntax_for_merge_tree=1 \
|
||||
--use_client_time_zone=1 \
|
||||
--input_format_values_interpret_expressions=0 \
|
||||
--query "$query" 2>/dev/null
|
||||
local settings=(
|
||||
--allow_deprecated_syntax_for_merge_tree 1
|
||||
--session_timezone UTC
|
||||
--input_format_values_interpret_expressions 0
|
||||
)
|
||||
TZ=UTC $CLICKHOUSE_CLIENT "${settings[@]}" --query "$query" 2>/dev/null
|
||||
if [ "$?" -ne 0 ]; then
|
||||
echo "query failed"
|
||||
fi
|
||||
|
@ -1,3 +1,15 @@
|
||||
-- disable timezone randomization since otherwise TTL may fail at particular datetime, i.e.:
|
||||
--
|
||||
-- SELECT
|
||||
-- now(),
|
||||
-- toDate(toTimeZone(now(), 'America/Mazatlan')),
|
||||
-- today()
|
||||
--
|
||||
-- ┌───────────────now()─┬─toDate(toTimeZone(now(), 'America/Mazatlan'))─┬────today()─┐
|
||||
-- │ 2023-07-24 06:24:06 │ 2023-07-23 │ 2023-07-24 │
|
||||
-- └─────────────────────┴───────────────────────────────────────────────┴────────────┘
|
||||
set session_timezone = '';
|
||||
|
||||
drop table if exists ttl_00933_1;
|
||||
|
||||
-- Column TTL works only with wide parts, because it's very expensive to apply it for compact parts
|
||||
|
@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
|
||||
set -e -o pipefail
|
||||
|
||||
# Run the client.
|
||||
$CLICKHOUSE_CLIENT --multiquery <<'EOF'
|
||||
# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used
|
||||
$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF'
|
||||
DROP DATABASE IF EXISTS dictdb_01042;
|
||||
CREATE DATABASE dictdb_01042;
|
||||
CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple();
|
||||
|
@ -2,6 +2,9 @@
|
||||
|
||||
set mutations_sync = 2;
|
||||
|
||||
-- system.parts has server default, timezone cannot be randomized
|
||||
set session_timezone = '';
|
||||
|
||||
drop table if exists ttl;
|
||||
|
||||
create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d)
|
||||
|
@ -238,10 +238,6 @@ defaultValueOfArgumentType
|
||||
defaultValueOfTypeName
|
||||
degrees
|
||||
demangle
|
||||
detectCharset
|
||||
detectLanguageUnknown
|
||||
detectProgrammingLanguage
|
||||
detectTonality
|
||||
divide
|
||||
dotProduct
|
||||
dumpColumnStructure
|
||||
|
@ -15,5 +15,7 @@ AND name NOT IN (
|
||||
'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3',
|
||||
'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo',
|
||||
'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD',
|
||||
'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test
|
||||
'lemmatize', 'tokenize', 'stem', 'synonyms',
|
||||
'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality'
|
||||
-- these functions are not enabled in fast test
|
||||
) ORDER BY name;
|
||||
|
@ -5,7 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "
|
||||
# NOTE: dictionaries will be updated according to server TZ, not session, so prohibit it's randomization
|
||||
$CLICKHOUSE_CLIENT --session_timezone '' -q "
|
||||
CREATE TABLE table_for_update_field_dictionary
|
||||
(
|
||||
key UInt64,
|
||||
|
@ -1,21 +0,0 @@
|
||||
2.00000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
2.12
|
||||
-2.00000000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
-2.12
|
||||
2.987600000000000033395508580724708735942840576171875000000000
|
||||
2.15
|
||||
-2.987600000000000033395508580724708735942840576171875000000000
|
||||
-2.15
|
||||
64.1230010986
|
||||
64.2340000000
|
||||
-64.1230010986
|
||||
-64.2340000000
|
||||
-32.345
|
||||
32.34500000000000000000000000000000000000000000000000000000000000000000000000000
|
||||
32.46
|
||||
-64.5671232345
|
||||
128.78932312332132985464
|
||||
-128.78932312332132985464
|
||||
128.78932312332132985464000000000000000000000000000000000000000000000000000000000
|
||||
128.7893231233
|
||||
-128.78932312332132985464123123789323123321329854600000000000000000000000000000000
|
@ -1,35 +0,0 @@
|
||||
-- Regular types
|
||||
SELECT toDecimalString(2, 77); -- more digits required than exist
|
||||
SELECT toDecimalString(2.123456, 2); -- rounding
|
||||
SELECT toDecimalString(-2, 77); -- more digits required than exist
|
||||
SELECT toDecimalString(-2.123456, 2); -- rounding
|
||||
|
||||
SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default)
|
||||
SELECT toDecimalString(2.1456, 2); -- rounding
|
||||
SELECT toDecimalString(-2.9876, 60); -- more digits required than exist
|
||||
SELECT toDecimalString(-2.1456, 2); -- rounding
|
||||
|
||||
-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway.
|
||||
SELECT toDecimalString(64.123::Float32, 10);
|
||||
SELECT toDecimalString(64.234::Float64, 10);
|
||||
SELECT toDecimalString(-64.123::Float32, 10);
|
||||
SELECT toDecimalString(-64.234::Float64, 10);
|
||||
|
||||
-- Decimals
|
||||
SELECT toDecimalString(-32.345::Decimal32(3), 3);
|
||||
SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist
|
||||
SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding
|
||||
SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10);
|
||||
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20);
|
||||
SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding
|
||||
SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist
|
||||
SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding
|
||||
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist
|
||||
|
||||
-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float.
|
||||
-- These values shall work OK.
|
||||
SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
|
||||
SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
|
||||
SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
|
||||
SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
|
||||
SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER}
|
@ -0,0 +1,10 @@
|
||||
"id","Nullable(Int64)","","","","",""
|
||||
"age","LowCardinality(UInt8)","","","","",""
|
||||
"name","Nullable(String)","","","","",""
|
||||
"status","Nullable(String)","","","","",""
|
||||
"hobbies","Array(Nullable(String))","","","","",""
|
||||
"id","Nullable(Int64)","","","","",""
|
||||
"age","LowCardinality(UInt8)","","","","",""
|
||||
"name","Nullable(String)","","","","",""
|
||||
"status","Nullable(String)","","","","",""
|
||||
"hobbies","Array(Nullable(String))","","","","",""
|
@ -0,0 +1,3 @@
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 FORMAT CSV;
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1;
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') FORMAT CSV SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1 SETTINGS max_threads=0; -- { clientError SYNTAX_ERROR }
|
@ -0,0 +1,55 @@
|
||||
-- Issue: https://github.com/ClickHouse/ClickHouse/issues/15995
|
||||
|
||||
DROP TABLE IF EXISTS outer;
|
||||
DROP TABLE IF EXISTS inner;
|
||||
|
||||
DROP TABLE IF EXISTS outer_distributed;
|
||||
DROP TABLE IF EXISTS inner_distributed;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS outer
|
||||
(
|
||||
`id` UInt64,
|
||||
`organization_id` UInt64,
|
||||
`version` UInt64
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(version)
|
||||
PARTITION BY organization_id % 8
|
||||
ORDER BY (organization_id, id);
|
||||
|
||||
CREATE TABLE inner
|
||||
(
|
||||
`id` UInt64,
|
||||
`outer_id` UInt64,
|
||||
`organization_id` UInt64,
|
||||
`version` UInt64,
|
||||
`date` Date
|
||||
)
|
||||
ENGINE = ReplacingMergeTree(version)
|
||||
PARTITION BY toYYYYMM(date)
|
||||
ORDER BY (organization_id, outer_id);
|
||||
|
||||
CREATE TABLE inner_distributed AS inner
|
||||
ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'inner', intHash64(organization_id));
|
||||
|
||||
CREATE TABLE outer_distributed AS outer
|
||||
ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'outer', intHash64(organization_id));
|
||||
|
||||
SELECT
|
||||
sum(if(inner_distributed.id != 0, 1, 0)) AS total,
|
||||
inner_distributed.date AS date
|
||||
FROM outer_distributed AS outer_distributed
|
||||
FINAL
|
||||
LEFT JOIN
|
||||
(
|
||||
SELECT
|
||||
inner_distributed.outer_id AS outer_id,
|
||||
inner_distributed.id AS id,
|
||||
inner_distributed.date AS date
|
||||
FROM inner_distributed AS inner_distributed
|
||||
FINAL
|
||||
WHERE inner_distributed.organization_id = 15078
|
||||
) AS inner_distributed ON inner_distributed.outer_id = outer_distributed.id
|
||||
WHERE (outer_distributed.organization_id = 15078) AND (date != toDate('1970-01-01'))
|
||||
GROUP BY date
|
||||
ORDER BY date DESC
|
||||
SETTINGS distributed_product_mode = 'local', optimize_skip_unused_shards = 1;
|
@ -0,0 +1,11 @@
|
||||
[1mexplain[0m
|
||||
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(Aggregating)
|
||||
FinalizeAggregatedTransform
|
||||
AggregatingInOrderTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
MergeTreeInOrder 0 → 1
|
@ -0,0 +1,18 @@
|
||||
SET read_in_order_two_level_merge_threshold=1000000;
|
||||
|
||||
DROP TABLE IF EXISTS t;
|
||||
CREATE TABLE t(a UInt64)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY a;
|
||||
|
||||
INSERT INTO t SELECT * FROM numbers_mt(1e3);
|
||||
OPTIMIZE TABLE t FINAL;
|
||||
|
||||
EXPLAIN PIPELINE
|
||||
SELECT a
|
||||
FROM t
|
||||
GROUP BY a
|
||||
FORMAT PrettySpace
|
||||
SETTINGS optimize_aggregation_in_order = 1;
|
||||
|
||||
DROP TABLE t;
|
Loading…
Reference in New Issue
Block a user