Merge remote-tracking branch 'origin/fix-progress-from-s3' into fix-progress-from-s3

This commit is contained in:
kssenii 2022-12-12 12:06:41 +01:00
commit a0c620269b
234 changed files with 2556 additions and 834 deletions

View File

@ -16,6 +16,7 @@ Checks: '*,
-android-*,
-bugprone-assignment-in-if-condition,
-bugprone-branch-clone,
-bugprone-easily-swappable-parameters,
-bugprone-exception-escape,
@ -23,7 +24,6 @@ Checks: '*,
-bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
-bugprone-unchecked-optional-access,
-bugprone-assignment-in-if-condition,
-cert-dcl16-c,
-cert-err58-cpp,
@ -34,7 +34,6 @@ Checks: '*,
-clang-analyzer-optin.performance.Padding,
-clang-analyzer-optin.portability.UnixAPI,
-clang-analyzer-security.insecureAPI.bzero,
-clang-analyzer-security.insecureAPI.strcpy,
@ -103,12 +102,13 @@ Checks: '*,
-openmp-*,
-misc-const-correctness,
-misc-no-recursion,
-misc-non-private-member-variables-in-classes,
-misc-const-correctness,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
-modernize-macro-to-enum,
-modernize-pass-by-value,
-modernize-return-braced-init-list,
-modernize-use-auto,
@ -117,7 +117,6 @@ Checks: '*,
-modernize-use-nodiscard,
-modernize-use-override,
-modernize-use-trailing-return-type,
-modernize-macro-to-enum,
-performance-inefficient-string-concatenation,
-performance-no-int-to-ptr,
@ -135,17 +134,35 @@ Checks: '*,
-readability-magic-numbers,
-readability-named-parameter,
-readability-redundant-declaration,
-readability-simplify-boolean-expr,
-readability-static-accessed-through-instance,
-readability-suspicious-call-argument,
-readability-uppercase-literal-suffix,
-readability-use-anyofallof,
-readability-simplify-boolean-expr,
-zirkon-*,
-misc-*, # temporarily disabled due to being too slow
# also disable checks in other categories which are aliases of checks in misc-*:
# https://releases.llvm.org/15.0.0/tools/clang/tools/extra/docs/clang-tidy/checks/list.html
-cert-dcl54-cpp, # alias of misc-new-delete-overloads
-hicpp-new-delete-operators, # alias of misc-new-delete-overloads
-cert-fio38-c, # alias of misc-non-copyable-objects
-cert-dcl03-c, # alias of misc-static-assert
-hicpp-static-assert, # alias of misc-static-assert
-cert-err09-cpp, # alias of misc-throw-by-value-catch-by-reference
-cert-err61-cpp, # alias of misc-throw-by-value-catch-by-reference
-cppcoreguidelines-c-copy-assignment-signature, # alias of misc-unconventional-assign-operator
-cppcoreguidelines-non-private-member-variables-in-classes, # alias of misc-non-private-member-variables-in-classes
'
WarningsAsErrors: '*'
# TODO: use dictionary syntax for CheckOptions when minimum clang-tidy level rose to 15
# some-check.SomeOption: 'some value'
# instead of
# - key: some-check.SomeOption
# value: 'some value'
CheckOptions:
- key: readability-identifier-naming.ClassCase
value: CamelCase

3
.gitmodules vendored
View File

@ -287,3 +287,6 @@
[submodule "contrib/xxHash"]
path = contrib/xxHash
url = https://github.com/Cyan4973/xxHash.git
[submodule "contrib/google-benchmark"]
path = contrib/google-benchmark
url = https://github.com/google/benchmark.git

View File

@ -111,6 +111,7 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
set (ENABLE_BENCHMARKS 0)
# For codegen_select_fuzzer
set (ENABLE_PROTOBUF 1)
@ -168,6 +169,7 @@ endif ()
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
# Only for Linux, x86_64 or aarch64.

View File

@ -16,8 +16,6 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming events
* [**v22.11 Release Webinar**](https://clickhouse.com/company/events/v22-11-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [**ClickHosue Meetup at the RELEX Solutions office in Stockholm**](https://www.meetup.com/clickhouse-stockholm-user-group/events/289492084/) - Dec 1 - Formulate by RELEX is a Swedish promotion planning and analytics company. They will share why they chose ClickHouse for their real time analytics and forecasting solution. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management.
* [**ClickHouse Meetup at the Deutsche Bank office in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/289311596/) - Dec 5 - Hear from Deutsche Bank on why they chose ClickHouse for big sensitive data in a regulated environment. The ClickHouse team will then present how ClickHouse is used for real time financial data analytics, including tick data, trade analytics and risk management.
* [**ClickHouse Meetup at the Rokt offices in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/289403909/) - Dec 6 - We are very excited to be holding our next in-person ClickHouse meetup at the Rokt offices in Manhattan. Featuring talks from Bloomberg, Disney Streaming, Prequel, Rokt, and ClickHouse
* [**v22.12 Release Webinar**](https://clickhouse.com/company/events/v22-12-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion!
* **ClickHouse Meetup in Seattle* - Keep an eye on this space as we will be announcing a January meetup in Seattle soon!

View File

@ -17,6 +17,7 @@
#include <filesystem>
#include <fmt/format.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <boost/algorithm/string/classification.hpp> /// is_any_of
namespace
@ -38,7 +39,7 @@ std::string getEditor()
return editor;
}
std::string getFuzzyFinder()
std::pair<std::string, FuzzyFinderType> getFuzzyFinder()
{
const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe)
@ -52,14 +53,20 @@ std::string getFuzzyFinder()
std::filesystem::path path(path_str);
std::filesystem::path sk_bin_path = path / "sk";
if (!access(sk_bin_path.c_str(), X_OK))
return sk_bin_path;
return {sk_bin_path, FUZZY_FINDER_SKIM};
std::filesystem::path fzf_bin_path = path / "fzf";
if (!access(fzf_bin_path.c_str(), X_OK))
return fzf_bin_path;
return {fzf_bin_path, FUZZY_FINDER_FZF};
}
return {};
return {"", FUZZY_FINDER_NONE};
}
String escapeShellArgument(std::string arg)
{
boost::replace_all(arg, "'", "'\\''");
return fmt::format("'{}'", arg);
}
/// See comments in ShellCommand::executeImpl()
@ -305,11 +312,12 @@ ReplxxLineReader::ReplxxLineReader(
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
, editor(getEditor())
, fuzzy_finder(getFuzzyFinder())
{
using namespace std::placeholders;
using Replxx = replxx::Replxx;
std::tie(fuzzy_finder, fuzzy_finder_type) = getFuzzyFinder();
if (!history_file_path.empty())
{
history_file_fd = open(history_file_path.c_str(), O_RDWR);
@ -415,11 +423,12 @@ ReplxxLineReader::ReplxxLineReader(
rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action);
/// interactive search in history (requires fzf/sk)
if (!fuzzy_finder.empty())
if (fuzzy_finder_type != FUZZY_FINDER_NONE)
{
auto interactive_history_search = [this](char32_t code)
{
openInteractiveHistorySearch();
rx.invoke(Replxx::ACTION::CLEAR_SELF, code);
return rx.invoke(Replxx::ACTION::REPAINT, code);
};
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
@ -515,9 +524,22 @@ void ReplxxLineReader::openInteractiveHistorySearch()
///
/// And also note, that fzf and skim is 95% compatible (at least option
/// that is used here)
std::string fuzzy_finder_command = fmt::format(
"{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}",
fuzzy_finder, history_file.getPath(), output_file.getPath());
std::string fuzzy_finder_command = fmt::format("{} --read0 --height=30%", fuzzy_finder);
switch (fuzzy_finder_type)
{
case FUZZY_FINDER_SKIM:
fuzzy_finder_command += " --tac --tiebreak=-score";
break;
case FUZZY_FINDER_FZF:
fuzzy_finder_command += " --tac --tiebreak=index";
break;
case FUZZY_FINDER_NONE:
/// assertion for !fuzzy_finder.empty() is enough
break;
}
fuzzy_finder_command += fmt::format(" < {} > {}",
escapeShellArgument(history_file.getPath()),
escapeShellArgument(output_file.getPath()));
char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr};
try

View File

@ -4,6 +4,14 @@
#include <replxx.hxx>
enum FuzzyFinderType
{
FUZZY_FINDER_NONE,
/// Use https://github.com/junegunn/fzf
FUZZY_FINDER_FZF,
/// Use https://github.com/lotabout/skim
FUZZY_FINDER_SKIM,
};
class ReplxxLineReader : public LineReader
{
@ -38,4 +46,5 @@ private:
std::string editor;
std::string fuzzy_finder;
FuzzyFinderType fuzzy_finder_type = FUZZY_FINDER_NONE;
};

View File

@ -187,8 +187,20 @@ struct integer<Bits, Signed>::_impl
static_assert(Bits % base_bits == 0);
/// Simple iteration in both directions
static constexpr unsigned little(unsigned idx) { return idx; }
static constexpr unsigned big(unsigned idx) { return item_count - 1 - idx; }
static constexpr unsigned little(unsigned idx)
{
if constexpr (std::endian::native == std::endian::little)
return idx;
else
return item_count - 1 - idx;
}
static constexpr unsigned big(unsigned idx)
{
if constexpr (std::endian::native == std::endian::little)
return item_count - 1 - idx;
else
return idx;
}
static constexpr unsigned any(unsigned idx) { return idx; }
template <class T>
@ -240,20 +252,20 @@ struct integer<Bits, Signed>::_impl
{
static_assert(sizeof(Integral) <= sizeof(base_type));
self.items[0] = _impl::to_Integral(rhs);
self.items[little(0)] = _impl::to_Integral(rhs);
if constexpr (std::is_signed_v<Integral>)
{
if (rhs < 0)
{
for (size_t i = 1; i < item_count; ++i)
self.items[i] = -1;
for (unsigned i = 1; i < item_count; ++i)
self.items[little(i)] = -1;
return;
}
}
for (size_t i = 1; i < item_count; ++i)
self.items[i] = 0;
for (unsigned i = 1; i < item_count; ++i)
self.items[little(i)] = 0;
}
template <typename TupleLike, size_t i = 0>
@ -348,7 +360,7 @@ struct integer<Bits, Signed>::_impl
constexpr const unsigned to_copy = min_bits / base_bits;
for (unsigned i = 0; i < to_copy; ++i)
self.items[i] = rhs.items[i];
self.items[little(i)] = rhs.items[little(i)];
if constexpr (Bits > Bits2)
{
@ -357,13 +369,13 @@ struct integer<Bits, Signed>::_impl
if (rhs < 0)
{
for (unsigned i = to_copy; i < item_count; ++i)
self.items[i] = -1;
self.items[little(i)] = -1;
return;
}
}
for (unsigned i = to_copy; i < item_count; ++i)
self.items[i] = 0;
self.items[little(i)] = 0;
}
}
@ -454,7 +466,7 @@ private:
{
if constexpr (sizeof(T) <= sizeof(base_type))
{
if (0 == idx)
if (little(0) == idx)
return static_cast<base_type>(x);
}
else if (idx * sizeof(base_type) < sizeof(T))
@ -475,7 +487,7 @@ private:
for (unsigned i = 0; i < op_items; ++i)
{
base_type rhs_item = get_item(rhs, i);
base_type rhs_item = get_item(rhs, little(i));
base_type & res_item = res.items[little(i)];
underflows[i] = res_item < rhs_item;
@ -508,7 +520,7 @@ private:
for (unsigned i = 0; i < op_items; ++i)
{
base_type rhs_item = get_item(rhs, i);
base_type rhs_item = get_item(rhs, little(i));
base_type & res_item = res.items[little(i)];
res_item += rhs_item;
@ -580,12 +592,12 @@ private:
else if constexpr (Bits == 128 && sizeof(base_type) == 8)
{
using CompilerUInt128 = unsigned __int128;
CompilerUInt128 a = (CompilerUInt128(lhs.items[1]) << 64) + lhs.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 b = (CompilerUInt128(rhs.items[1]) << 64) + rhs.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 a = (CompilerUInt128(lhs.items[little(1)]) << 64) + lhs.items[little(0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 b = (CompilerUInt128(rhs.items[little(1)]) << 64) + rhs.items[little(0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 c = a * b;
integer<Bits, Signed> res;
res.items[0] = c;
res.items[1] = c >> 64;
res.items[little(0)] = c;
res.items[little(1)] = c >> 64;
return res;
}
else
@ -597,7 +609,7 @@ private:
#endif
for (unsigned i = 0; i < item_count; ++i)
{
base_type rhs_item = get_item(rhs, i);
base_type rhs_item = get_item(rhs, little(i));
unsigned pos = i * base_bits;
while (rhs_item)
@ -792,7 +804,7 @@ public:
integer<Bits, Signed> res;
for (unsigned i = 0; i < item_count; ++i)
res.items[little(i)] = lhs.items[little(i)] | get_item(rhs, i);
res.items[little(i)] = lhs.items[little(i)] | get_item(rhs, little(i));
return res;
}
else
@ -810,7 +822,7 @@ public:
integer<Bits, Signed> res;
for (unsigned i = 0; i < item_count; ++i)
res.items[little(i)] = lhs.items[little(i)] & get_item(rhs, i);
res.items[little(i)] = lhs.items[little(i)] & get_item(rhs, little(i));
return res;
}
else
@ -845,17 +857,17 @@ public:
{
using CompilerUInt128 = unsigned __int128;
CompilerUInt128 a = (CompilerUInt128(numerator.items[1]) << 64) + numerator.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 b = (CompilerUInt128(denominator.items[1]) << 64) + denominator.items[0]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 a = (CompilerUInt128(numerator.items[little(1)]) << 64) + numerator.items[little(0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 b = (CompilerUInt128(denominator.items[little(1)]) << 64) + denominator.items[little(0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
CompilerUInt128 c = a / b; // NOLINT
integer<Bits, Signed> res;
res.items[0] = c;
res.items[1] = c >> 64;
res.items[little(0)] = c;
res.items[little(1)] = c >> 64;
CompilerUInt128 remainder = a - b * c;
numerator.items[0] = remainder;
numerator.items[1] = remainder >> 64;
numerator.items[little(0)] = remainder;
numerator.items[little(1)] = remainder >> 64;
return res;
}
@ -1039,15 +1051,15 @@ constexpr integer<Bits, Signed>::integer(std::initializer_list<T> il) noexcept
else
{
auto it = il.begin();
for (size_t i = 0; i < _impl::item_count; ++i)
for (unsigned i = 0; i < _impl::item_count; ++i)
{
if (it < il.end())
{
items[i] = *it;
items[_impl::little(i)] = *it;
++it;
}
else
items[i] = 0;
items[_impl::little(i)] = 0;
}
}
}
@ -1208,7 +1220,7 @@ constexpr integer<Bits, Signed>::operator T() const noexcept
UnsignedT res{};
for (unsigned i = 0; i < _impl::item_count && i < (sizeof(T) + sizeof(base_type) - 1) / sizeof(base_type); ++i)
res += UnsignedT(items[i]) << (sizeof(base_type) * 8 * i); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
res += UnsignedT(items[_impl::little(i)]) << (sizeof(base_type) * 8 * i); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult)
return res;
}

View File

@ -5,21 +5,21 @@ if (ENABLE_CLANG_TIDY)
find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache")
if (CLANG_TIDY_CACHE_PATH)
find_program (_CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12" "clang-tidy")
# Why do we use ';' here?
# It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY
# The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax.
set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper")
else ()
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
find_program (CLANG_TIDY_PATH NAMES "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12" "clang-tidy")
endif ()
if (CLANG_TIDY_PATH)
message (STATUS
"Using clang-tidy: ${CLANG_TIDY_PATH}.
The checks will be run during build process.
See the .clang-tidy file at the root directory to configure the checks.")
The checks will be run during the build process.
See the .clang-tidy file in the root directory to configure the checks.")
set (USE_CLANG_TIDY ON)

View File

@ -21,12 +21,12 @@ set (APPLE_CLANG_MINIMUM_VERSION 12.0.0)
set (GCC_MINIMUM_VERSION 11)
if (COMPILER_GCC)
message (FATAL_ERROR "Compilation with GCC is unsupported. Please use Clang instead.")
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION})
message (FATAL_ERROR "Compilation with GCC version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${GCC_MINIMUM_VERSION}.")
endif ()
message (WARNING "Compilation with GCC is unsupported. Please use Clang instead.")
elseif (COMPILER_CLANG)
if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang")
# (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it.
@ -83,7 +83,7 @@ if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
if (NOT LINKER_NAME)
if (GOLD_PATH)
message (WARNING "Linking with gold is not recommended. Please use lld.")
message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
if (COMPILER_GCC)
set (LINKER_NAME "gold")
else ()

View File

@ -171,6 +171,8 @@ add_contrib (annoy-cmake annoy)
add_contrib (xxHash-cmake xxHash)
add_contrib (google-benchmark-cmake google-benchmark)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,

1
contrib/google-benchmark vendored Submodule

@ -0,0 +1 @@
Subproject commit 2257fa4d6afb8e5a2ccd510a70f38fe7fcdf1edf

View File

@ -0,0 +1,34 @@
set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-benchmark/src")
set (SRCS
"${SRC_DIR}/benchmark.cc"
"${SRC_DIR}/benchmark_api_internal.cc"
"${SRC_DIR}/benchmark_name.cc"
"${SRC_DIR}/benchmark_register.cc"
"${SRC_DIR}/benchmark_runner.cc"
"${SRC_DIR}/check.cc"
"${SRC_DIR}/colorprint.cc"
"${SRC_DIR}/commandlineflags.cc"
"${SRC_DIR}/complexity.cc"
"${SRC_DIR}/console_reporter.cc"
"${SRC_DIR}/counter.cc"
"${SRC_DIR}/csv_reporter.cc"
"${SRC_DIR}/json_reporter.cc"
"${SRC_DIR}/perf_counters.cc"
"${SRC_DIR}/reporter.cc"
"${SRC_DIR}/sleep.cc"
"${SRC_DIR}/statistics.cc"
"${SRC_DIR}/string_util.cc"
"${SRC_DIR}/sysinfo.cc"
"${SRC_DIR}/timers.cc")
add_library(google_benchmark "${SRCS}")
target_include_directories(google_benchmark SYSTEM PUBLIC "${SRC_DIR}/../include")
add_library(google_benchmark_main "${SRC_DIR}/benchmark_main.cc")
target_link_libraries(google_benchmark_main PUBLIC google_benchmark)
add_library(google_benchmark_all INTERFACE)
target_link_libraries(google_benchmark_all INTERFACE google_benchmark google_benchmark_main)
add_library(ch_contrib::gbenchmark_all ALIAS google_benchmark_all)

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit cdc8442f7a5e7a6ff6eea39c69665e0c5034d85d
Subproject commit becb7a1b15bdb4845ec3721a550707ffa51d029d

View File

@ -15,7 +15,7 @@ set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
set (EFFICIENT_WAIT ON)
set (EFFICIENT_WAIT OFF)
set (BLOCK_ON_FAULT ON)
set (LOG_HW_INIT OFF)
set (SANITIZE_MEMORY OFF)
@ -42,7 +42,7 @@ include("${QPL_PROJECT_DIR}/cmake/CompileOptions.cmake")
include(CheckLanguage)
check_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER)
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!")
endif()
# [SUBDIR]isal
@ -110,18 +110,18 @@ target_compile_options(isal PRIVATE
"$<$<CONFIG:Debug>:>"
"$<$<CONFIG:Release>:>")
target_compile_options(isal_asm PUBLIC "-I${QPL_SRC_DIR}/isal/include/"
PUBLIC "-I${QPL_SRC_DIR}/isal/igzip/"
PUBLIC "-I${QPL_SRC_DIR}/isal/crc/"
PUBLIC "-DQPL_LIB")
target_compile_options(isal_asm PRIVATE "-I${QPL_SRC_DIR}/isal/include/"
PRIVATE "-I${QPL_SRC_DIR}/isal/igzip/"
PRIVATE "-I${QPL_SRC_DIR}/isal/crc/"
PRIVATE "-DQPL_LIB")
# AS_FEATURE_LEVEL=10 means "Check SIMD capabilities of the target system at runtime and use up to AVX512 if available".
# AS_FEATURE_LEVEL=5 means "Check SIMD capabilities of the target system at runtime and use up to AVX2 if available".
# HAVE_KNOWS_AVX512 means rely on AVX512 being available on the target system.
if (ENABLE_AVX512)
target_compile_options(isal_asm PUBLIC "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10")
target_compile_options(isal_asm PRIVATE "-DHAVE_AS_KNOWS_AVX512" "-DAS_FEATURE_LEVEL=10")
else()
target_compile_options(isal_asm PUBLIC "-DAS_FEATURE_LEVEL=5")
target_compile_options(isal_asm PRIVATE "-DAS_FEATURE_LEVEL=5")
endif()
# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS.
@ -315,7 +315,13 @@ target_compile_definitions(_qpl
PRIVATE -DQPL_BADARG_CHECK
PUBLIC -DENABLE_QPL_COMPRESSION)
find_library(LIBACCEL accel-config)
if(NOT LIBACCEL)
message(FATAL_ERROR "Please install QPL dependency library:libaccel-config from https://github.com/intel/idxd-config")
endif()
target_link_libraries(_qpl
PRIVATE ${LIBACCEL}
PRIVATE ${CMAKE_DL_LIBS})
add_library (ch_contrib::qpl ALIAS _qpl)

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="22.11.1.1360"
ARG VERSION="22.11.2.30"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="22.11.1.1360"
ARG VERSION="22.11.2.30"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -0,0 +1,23 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.8.11.15-lts (65c9506d161) FIXME as compared to v22.8.10.29-lts (d568a57f7af)
#### Bug Fix
* Backported in [#43098](https://github.com/ClickHouse/ClickHouse/issues/43098): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#43751](https://github.com/ClickHouse/ClickHouse/issues/43751): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#43617](https://github.com/ClickHouse/ClickHouse/issues/43617): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#43886](https://github.com/ClickHouse/ClickHouse/issues/43886): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -16,7 +16,7 @@ import SupersetDocker from '@site/docs/en/_snippets/_add_superset_detail.md';
## Goal
In this guide you will learn how to:
- Load the OpenCelliD data in Clickhouse
- Load the OpenCelliD data in ClickHouse
- Connect Apache Superset to ClickHouse
- Build a dashboard based on data available in the dataset
@ -275,7 +275,7 @@ Here is a description of the columns taken from the OpenCelliD forum:
To find your MCC check [Mobile network codes](https://en.wikipedia.org/wiki/Mobile_country_code), and use the three digits in the **Mobile country code** column.
:::
The schema for this table was designed for compact storage on disk and query speed.
The schema for this table was designed for compact storage on disk and query speed.
- The `radio` data is stored as an `Enum8` (`UInt8`) rather than a string.
- `mcc` or Mobile country code, is stored as a `UInt16` as we know the range is 1 - 999.
- `lon` and `lat` are `Float64`.

View File

@ -56,6 +56,7 @@ As of November 8th, 2022, each TSV is approximately the following size and numbe
- [Line by line commit history of a file](#line-by-line-commit-history-of-a-file)
- [Unsolved Questions](#unsolved-questions)
- [Git blame](#git-blame)
- [Related Content](#related-content)
# Generating the data
@ -2497,3 +2498,7 @@ LIMIT 20
We welcome exact and improved solutions here.
# Related Content
- [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits)
- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits)

View File

@ -22,5 +22,8 @@ functions in ClickHouse. The sample datasets include:
- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse
- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
- The [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset
- The [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) shows how JSON data can be loaded
- The [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3
View the **Tutorials and Datasets** menu for a complete list of sample datasets.
View the **Tutorials and Datasets** menu for a complete list of sample datasets.

View File

@ -8,8 +8,8 @@ slug: /en/install
You have two options for getting up and running with ClickHouse:
- **[ClickHouse Cloud](https://clickhouse.cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse
- **Self-managed ClickHouse:** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture
- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse
- **[Self-managed ClickHouse](https://github.com/ClickHouse/ClickHouse):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture
## ClickHouse Cloud
@ -406,4 +406,3 @@ SELECT 1
**Congratulations, the system works!**
To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md).

View File

@ -244,7 +244,7 @@ The username and password can be indicated in one of three ways:
$ echo 'SELECT 1' | curl 'http://user:password@localhost:8123/' -d @-
```
1. In the user and password URL parameters. Example:
2. In the user and password URL parameters (*We do not recommend using this method as the parameter might be logged by web proxy and cached in the browser*). Example:
<!-- -->
@ -252,7 +252,7 @@ $ echo 'SELECT 1' | curl 'http://user:password@localhost:8123/' -d @-
$ echo 'SELECT 1' | curl 'http://localhost:8123/?user=user&password=password' -d @-
```
1. Using X-ClickHouse-User and X-ClickHouse-Key headers. Example:
3. Using X-ClickHouse-User and X-ClickHouse-Key headers. Example:
<!-- -->

View File

@ -1,5 +1,8 @@
---
slug: /en/operations/backup
---
[//]: # (This file is included in Manage > Backups)
# Backup and Restore
- [Backup to a local disk](#backup-to-a-local-disk)
- [Configuring backup/restore to use an S3 endpoint](#configuring-backuprestore-to-use-an-s3-endpoint)
@ -55,7 +58,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- SETTINGS:
- [`compression_method`](en/sql-reference/statements/create/table/#column-compression-codecs) and compression_level
- `password` for the file on disk
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
### Usage examples
@ -72,7 +75,7 @@ RESTORE TABLE test.table FROM Disk('backups', '1.zip')
:::note
The above RESTORE would fail if the table `test.table` contains data, you would have to drop the table in order to test the RESTORE, or use the setting `allow_non_empty_tables=true`:
```
RESTORE TABLE test.table FROM Disk('backups', '1.zip')
RESTORE TABLE test.table FROM Disk('backups', '1.zip')
SETTINGS allow_non_empty_tables=true
```
:::
@ -101,7 +104,7 @@ BACKUP TABLE test.table TO Disk('backups', 'incremental-a.zip')
Restore all data from the incremental backup and the base_backup into a new table `test.table2`:
```
RESTORE TABLE test.table AS test.table2
RESTORE TABLE test.table AS test.table2
FROM Disk('backups', 'incremental-a.zip');
```
@ -356,4 +359,3 @@ Data can be restored from backup using the `ALTER TABLE ... ATTACH PARTITION ...
For more information about queries related to partition manipulations, see the [ALTER documentation](../sql-reference/statements/alter/partition.md#alter_manipulations-with-partitions).
A third-party tool is available to automate this approach: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).

View File

@ -286,3 +286,7 @@ end script
If you use antivirus software configure it to skip folders with ClickHouse datafiles (`/var/lib/clickhouse`) otherwise performance may be reduced and you may experience unexpected errors during data ingestion and background merges.
[Original article](https://clickhouse.com/docs/en/operations/tips/)
## Related Content
- [Getting started with ClickHouse? Here are 13 "Deadly Sins" and how to avoid them](https://clickhouse.com/blog/common-getting-started-issues-with-clickhouse)

View File

@ -1,5 +1,8 @@
---
slug: /en/operations/update
---
[//]: # (This file is included in Manage > Updates)
# Update
## Self-managed ClickHouse Upgrade

View File

@ -117,3 +117,8 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
```
[Original article](https://clickhouse.com/docs/en/operations/utils/clickhouse-local/) <!--hide-->
## Related Content
- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1)
- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)

View File

@ -1,6 +1,7 @@
---
slug: /en/sql-reference/aggregate-functions/reference/exponentialmovingaverage
sidebar_position: 108
sidebar_title: exponentialMovingAverage
---
## exponentialMovingAverage

View File

@ -95,3 +95,6 @@ Result:
└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘
```
## Related Content
- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)

View File

@ -75,3 +75,7 @@ SELECT * FROM json FORMAT JSONEachRow
```text
{"o":{"a":1,"b":{"c":2,"d":[1,2,3]}}}
```
## Related Content
- [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json)

View File

@ -1,4 +1,4 @@
:::tip
If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`.
Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md).
If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`.
Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/cloud/reference/cloud-compatibility.md).
:::

View File

@ -134,3 +134,7 @@ Result:
│ [[[(3,1),(0,1),(0,-1),(3,-1)]]] │ Value │
└─────────────────────────────────┴───────┘
```
## Related Content
- [Exploring massive, real-world data sets: 100+ Years of Weather Records in ClickHouse](https://clickhouse.com/blog/real-world-data-noaa-climate-data)

View File

@ -464,5 +464,39 @@ Removes the query string and fragment identifier. The question mark and number s
### cutURLParameter(URL, name)
Removes the name URL parameter, if present. This function works under the assumption that the parameter name is encoded in the URL exactly the same way as in the passed argument.
Removes the `name` parameter from URL, if present. This function does not encode or decode characters in parameter names, e.g. `Client ID` and `Client%20ID` are treated as different parameter names.
**Syntax**
``` sql
cutURLParameter(URL, name)
```
**Arguments**
- `url` — URL. [String](../../sql-reference/data-types/string.md).
- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings.
**Returned value**
- URL with `name` URL parameter removed.
Type: `String`.
**Example**
Query:
``` sql
SELECT
cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', 'a') as url_without_a,
cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', ['c', 'e']) as url_without_c_and_e;
```
Result:
``` text
┌─url_without_a────────────────┬─url_without_c_and_e──────┐
│ http://bigmir.net/?c=d&e=f#g │ http://bigmir.net/?a=b#g │
└──────────────────────────────┴──────────────────────────┘
```

View File

@ -162,7 +162,7 @@ ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [W
This query creates a local backup of a specified partition. If the `PARTITION` clause is omitted, the query creates the backup of all partitions at once.
:::note
:::note
The entire backup process is performed without stopping the server.
:::
@ -172,9 +172,9 @@ At the time of execution, for a data snapshot, the query creates hardlinks to a
- `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config.
- `N` is the incremental number of the backup.
- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number.
- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number.
:::note
:::note
If you use [a set of disks for data storage in a table](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression.
:::
@ -194,7 +194,7 @@ To restore data from a backup, do the following:
Restoring from a backup does not require stopping the server.
For more information about backups and restoring data, see the [Data Backup](/docs/en/manage/backups.mdx) section.
For more information about backups and restoring data, see the [Data Backup](/docs/en/operations/backup.md) section.
## UNFREEZE PARTITION

View File

@ -7,7 +7,7 @@ sidebar_label: UPDATE
# ALTER TABLE … UPDATE Statements
``` sql
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] [IN PARTITION partition_id] WHERE filter_expr
```
Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).

View File

@ -10,7 +10,7 @@ Creates [settings profiles](../../../operations/access-rights.md#settings-profil
Syntax:
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```

View File

@ -587,3 +587,8 @@ ORDER BY
│ ambient_temp │ 2020-03-01 12:00:00 │ 16 │ 16 │
└──────────────┴─────────────────────┴───────┴─────────────────────────┘
```
## Related Content
- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits)
- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3)

View File

@ -404,5 +404,39 @@ SELECT netloc('http://paul@www.example.com:80/');
### cutURLParameter(URL, name) {#cuturlparameterurl-name}
Удаляет параметр URL с именем name, если такой есть. Функция работает при допущении, что имя параметра закодировано в URL в точности таким же образом, что и в переданном аргументе.
Удаляет параметр с именем `name` из URL, если такой есть. Функция не кодирует или декодирует символы в именах параметров. Например `Client ID` и `Client%20ID` обрабатываются как разные имена параметров.
**Синтаксис**
``` sql
cutURLParameter(URL, name)
```
**Аргументы**
- `url` — URL. [String](../../sql-reference/data-types/string.md).
- `name` — имя параметра URL. [String](../../sql-reference/data-types/string.md) или [Array](../../sql-reference/data-types/array.md) состоящий из строк.
**Возвращаемое значение**
- URL с удалённым параметром URL с именем `name`.
Type: `String`.
**Пример**
Запрос:
``` sql
SELECT
cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', 'a') as url_without_a,
cutURLParameter('http://bigmir.net/?a=b&c=d&e=f#g', ['c', 'e']) as url_without_c_and_e;
```
Результат:
``` text
┌─url_without_a────────────────┬─url_without_c_and_e──────┐
│ http://bigmir.net/?c=d&e=f#g │ http://bigmir.net/?a=b#g │
└──────────────────────────────┴──────────────────────────┘
```

View File

@ -11,7 +11,7 @@ sidebar_label: "Профиль настроек"
Синтаксис:
``` sql
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluster_name1]
CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
[SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...]
```
@ -26,4 +26,4 @@ CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] TO name1 [ON CLUSTER cluste
CREATE SETTINGS PROFILE max_memory_usage_profile SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin
```
<!--hide-->
<!--hide-->

View File

@ -1 +0,0 @@
../../../en/sql-reference/table-functions/format.md

View File

@ -0,0 +1,75 @@
---
slug: /ru/sql-reference/table-functions/format
sidebar_position: 56
sidebar_label: format
---
# format
Extracts table structure from data and parses it according to specified input format.
**Syntax**
``` sql
format(format_name, data)
```
**Parameters**
- `format_name` — The [format](../../interfaces/formats.md#formats) of the data.
- `data` — String literal or constant expression that returns a string containing data in specified format
**Returned value**
A table with data parsed from `data` argument according specified format and extracted schema.
**Examples**
**Query:**
``` sql
:) select * from format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 112}
{"a": "World", "b": 124}
$$)
```
**Result:**
```text
┌───b─┬─a─────┐
│ 111 │ Hello │
│ 123 │ World │
│ 112 │ Hello │
│ 124 │ World │
└─────┴───────┘
```
**Query:**
```sql
:) desc format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 112}
{"a": "World", "b": 124}
$$)
```
**Result:**
```text
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ b │ Nullable(Float64) │ │ │ │ │ │
│ a │ Nullable(String) │ │ │ │ │ │
└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
**See Also**
- [Formats](../../interfaces/formats.md)
[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/format) <!--hide-->

View File

@ -1 +0,0 @@
../../../en/sql-reference/table-functions/format.md

View File

@ -0,0 +1,75 @@
---
slug: /zh/sql-reference/table-functions/format
sidebar_position: 56
sidebar_label: format
---
# format
Extracts table structure from data and parses it according to specified input format.
**Syntax**
``` sql
format(format_name, data)
```
**Parameters**
- `format_name` — The [format](../../interfaces/formats.md#formats) of the data.
- `data` — String literal or constant expression that returns a string containing data in specified format
**Returned value**
A table with data parsed from `data` argument according specified format and extracted schema.
**Examples**
**Query:**
``` sql
:) select * from format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 112}
{"a": "World", "b": 124}
$$)
```
**Result:**
```text
┌───b─┬─a─────┐
│ 111 │ Hello │
│ 123 │ World │
│ 112 │ Hello │
│ 124 │ World │
└─────┴───────┘
```
**Query:**
```sql
:) desc format(JSONEachRow,
$$
{"a": "Hello", "b": 111}
{"a": "World", "b": 123}
{"a": "Hello", "b": 112}
{"a": "World", "b": 124}
$$)
```
**Result:**
```text
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ b │ Nullable(Float64) │ │ │ │ │ │
│ a │ Nullable(String) │ │ │ │ │ │
└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
**See Also**
- [Formats](../../interfaces/formats.md)
[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/format) <!--hide-->

View File

@ -111,6 +111,8 @@ EOF
tar -czf "$TARBALL" -C "$OUTPUT_DIR" "$PKG_DIR"
fi
sha512sum "$TARBALL" > "$TARBALL".sha512
rm -r "$PKG_PATH"
}

View File

@ -58,22 +58,52 @@ namespace ErrorCodes
class Benchmark : public Poco::Util::Application
{
public:
Benchmark(unsigned concurrency_, double delay_,
Strings && hosts_, Ports && ports_, bool round_robin_,
bool cumulative_, bool secure_, const String & default_database_,
const String & user_, const String & password_, const String & quota_key_, const String & stage,
bool randomize_, size_t max_iterations_, double max_time_,
const String & json_path_, size_t confidence_,
const String & query_id_, const String & query_to_execute_, bool continue_on_errors_,
bool reconnect_, bool display_client_side_time_, bool print_stacktrace_, const Settings & settings_)
Benchmark(unsigned concurrency_,
double delay_,
Strings && hosts_,
Ports && ports_,
bool round_robin_,
bool cumulative_,
bool secure_,
const String & default_database_,
const String & user_,
const String & password_,
const String & quota_key_,
const String & stage,
bool randomize_,
size_t max_iterations_,
double max_time_,
const String & json_path_,
size_t confidence_,
const String & query_id_,
const String & query_to_execute_,
size_t max_consecutive_errors_,
bool continue_on_errors_,
bool reconnect_,
bool display_client_side_time_,
bool print_stacktrace_,
const Settings & settings_)
:
round_robin(round_robin_), concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_),
cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_),
json_path(json_path_), confidence(confidence_), query_id(query_id_),
query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_),
round_robin(round_robin_),
concurrency(concurrency_),
delay(delay_),
queue(concurrency),
randomize(randomize_),
cumulative(cumulative_),
max_iterations(max_iterations_),
max_time(max_time_),
json_path(json_path_),
confidence(confidence_),
query_id(query_id_),
query_to_execute(query_to_execute_),
continue_on_errors(continue_on_errors_),
max_consecutive_errors(max_consecutive_errors_),
reconnect(reconnect_),
display_client_side_time(display_client_side_time_),
print_stacktrace(print_stacktrace_), settings(settings_),
shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())),
print_stacktrace(print_stacktrace_),
settings(settings_),
shared_context(Context::createShared()),
global_context(Context::createGlobal(shared_context.get())),
pool(concurrency)
{
const auto secure = secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable;
@ -166,6 +196,7 @@ private:
String query_id;
String query_to_execute;
bool continue_on_errors;
size_t max_consecutive_errors;
bool reconnect;
bool display_client_side_time;
bool print_stacktrace;
@ -174,6 +205,8 @@ private:
ContextMutablePtr global_context;
QueryProcessingStage::Enum query_processing_stage;
std::atomic<size_t> consecutive_errors{0};
/// Don't execute new queries after timelimit or SIGINT or exception
std::atomic<bool> shutdown{false};
@ -393,13 +426,14 @@ private:
try
{
execute(connection_entries, query, connection_index);
consecutive_errors = 0;
}
catch (...)
{
std::lock_guard lock(mutex);
std::cerr << "An error occurred while processing the query " << "'" << query << "'"
<< ": " << getCurrentExceptionMessage(false) << std::endl;
if (!continue_on_errors)
if (!(continue_on_errors || max_consecutive_errors > ++consecutive_errors))
{
shutdown = true;
throw;
@ -648,6 +682,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
("stacktrace", "print stack traces of exceptions")
("confidence", value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)")
("query_id", value<std::string>()->default_value(""), "")
("max-consecutive-errors", value<size_t>()->default_value(0), "set number of allowed consecutive errors")
("continue_on_errors", "continue testing even if a query fails")
("reconnect", "establish new connection for every query")
("client-side-time", "display the time including network communication instead of server-side time; note that for server versions before 22.8 we always display client-side time")
@ -702,6 +737,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
options["confidence"].as<size_t>(),
options["query_id"].as<std::string>(),
options["query"].as<std::string>(),
options["max-consecutive-errors"].as<size_t>(),
options.count("continue_on_errors"),
options.count("reconnect"),
options.count("client-side-time"),

View File

@ -348,17 +348,9 @@ void Client::connect()
}
catch (const Exception & e)
{
/// It is typical when users install ClickHouse, type some password and instantly forget it.
/// This problem can't be fixed with reconnection so it is not attempted
if ((connection_parameters.user.empty() || connection_parameters.user == "default")
&& e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED)
if (e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED)
{
std::cerr << std::endl
<< "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl
<< "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl
<< "and deleting this file will reset the password." << std::endl
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl
<< std::endl;
/// This problem can't be fixed with reconnection so it is not attempted
throw;
}
else

View File

@ -670,24 +670,30 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
}
/// Create node to signal that we finished moving
/// Also increment a counter of processed partitions
{
String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id);
zookeeper->set(current_partition_attach_is_done, state_finished, 0);
/// Also increment a counter of processed partitions
const auto state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id);
const auto task_status = task_zookeeper_path + "/status";
/// Try until success
while (true)
{
Coordination::Stat stat;
auto status_json = zookeeper->get(task_zookeeper_path + "/status", &stat);
auto status_json = zookeeper->get(task_status, &stat);
auto statuses = StatusAccumulator::fromJSON(status_json);
/// Increment status for table.
auto status_for_table = (*statuses)[task_table.name_in_config];
status_for_table.processed_partitions_count += 1;
(*statuses)[task_table.name_in_config] = status_for_table;
(*statuses)[task_table.name_in_config].processed_partitions_count += 1;
auto statuses_to_commit = StatusAccumulator::serializeToJSON(statuses);
auto error = zookeeper->trySet(task_zookeeper_path + "/status", statuses_to_commit, stat.version, &stat);
if (error == Coordination::Error::ZOK)
Coordination::Requests ops;
ops.emplace_back(zkutil::makeSetRequest(current_partition_attach_is_done, state_finished, 0));
ops.emplace_back(zkutil::makeSetRequest(task_status, statuses_to_commit, stat.version));
Coordination::Responses responses;
Coordination::Error code = zookeeper->tryMulti(ops, responses);
if (code == Coordination::Error::ZOK)
break;
}
}

View File

@ -20,7 +20,7 @@
#include <Backups/RestorerFromBackup.h>
#include <Core/Settings.h>
#include <base/defines.h>
#include <base/find_symbols.h>
#include <IO/Operators.h>
#include <Poco/AccessExpireCache.h>
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/split.hpp>
@ -454,9 +454,21 @@ UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Ne
{
tryLogCurrentException(getLogger(), "from: " + address.toString() + ", user: " + credentials.getUserName() + ": Authentication failed");
WriteBufferFromOwnString message;
message << credentials.getUserName() << ": Authentication failed: password is incorrect, or there is no user with such name.";
/// Better exception message for usability.
/// It is typical when users install ClickHouse, type some password and instantly forget it.
if (credentials.getUserName().empty() || credentials.getUserName() == "default")
message << "\n\n"
<< "If you have installed ClickHouse and forgot password you can reset it in the configuration file.\n"
<< "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml\n"
<< "and deleting this file will reset the password.\n"
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed.\n\n";
/// We use the same message for all authentication failures because we don't want to give away any unnecessary information for security reasons,
/// only the log will show the exact reason.
throw Exception(credentials.getUserName() + ": Authentication failed: password is incorrect or there is no user with such name", ErrorCodes::AUTHENTICATION_FAILED);
throw Exception(message.str(), ErrorCodes::AUTHENTICATION_FAILED);
}
}

View File

@ -77,7 +77,10 @@ protected:
static bool getFlag(ConstAggregateDataPtr __restrict place) noexcept
{
return result_is_nullable ? place[0] : true;
if constexpr (result_is_nullable)
return place[0];
else
return true;
}
public:
@ -98,9 +101,10 @@ public:
DataTypePtr getReturnType() const override
{
return result_is_nullable
? makeNullable(nested_function->getReturnType())
: nested_function->getReturnType();
if constexpr (result_is_nullable)
return makeNullable(nested_function->getReturnType());
else
return nested_function->getReturnType();
}
void create(AggregateDataPtr __restrict place) const override
@ -136,8 +140,9 @@ public:
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
if (result_is_nullable && getFlag(rhs))
setFlag(place);
if constexpr (result_is_nullable)
if (getFlag(rhs))
setFlag(place);
nested_function->merge(nestedPlace(place), nestedPlace(rhs), arena);
}
@ -472,7 +477,7 @@ public:
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
bool included_elements = 0;
size_t included_elements = 0;
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; i++)
{

View File

@ -3,6 +3,8 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Interpreters/Context.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
@ -56,7 +58,7 @@ public:
auto & count_distinct_argument_column_typed = count_distinct_argument_column->as<ColumnNode &>();
/// Build subquery SELECT count_distinct_argument_column FROM table_expression GROUP BY count_distinct_argument_column
auto subquery = std::make_shared<QueryNode>();
auto subquery = std::make_shared<QueryNode>(Context::createCopy(query_node->getContext()));
subquery->getJoinTree() = query_node->getJoinTree();
subquery->getProjection().getNodes().push_back(count_distinct_argument_column);
subquery->getGroupBy().getNodes().push_back(count_distinct_argument_column);

View File

@ -6,6 +6,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Interpreters/Context.h>
namespace DB
{
@ -16,7 +17,8 @@ namespace
class NormalizeCountVariantsVisitor : public InDepthQueryTreeVisitor<NormalizeCountVariantsVisitor>
{
public:
static void visitImpl(QueryTreeNodePtr & node)
explicit NormalizeCountVariantsVisitor(ContextPtr context_) : context(std::move(context_)) {}
void visitImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || !function_node->isAggregateFunction() || (function_node->getFunctionName() != "count" && function_node->getFunctionName() != "sum"))
@ -39,13 +41,16 @@ public:
}
else if (function_node->getFunctionName() == "sum" &&
first_argument_constant_literal.getType() == Field::Types::UInt64 &&
first_argument_constant_literal.get<UInt64>() == 1)
first_argument_constant_literal.get<UInt64>() == 1 &&
!context->getSettingsRef().aggregate_functions_null_for_empty)
{
resolveAsCountAggregateFunction(*function_node);
function_node->getArguments().getNodes().clear();
}
}
private:
ContextPtr context;
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
auto function_result_type = function_node.getResultType();
@ -59,9 +64,9 @@ private:
}
void NormalizeCountVariantsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr)
void NormalizeCountVariantsPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
NormalizeCountVariantsVisitor visitor;
NormalizeCountVariantsVisitor visitor(context);
visitor.visit(query_tree_node);
}

View File

@ -647,6 +647,11 @@ struct IdentifierResolveScope
subquery_depth = parent_scope->subquery_depth;
context = parent_scope->context;
}
if (auto * union_node = scope_node->as<UnionNode>())
context = union_node->getContext();
else if (auto * query_node = scope_node->as<QueryNode>())
context = query_node->getContext();
}
QueryTreeNodePtr scope_node;
@ -974,7 +979,9 @@ public:
void resolve(QueryTreeNodePtr node, const QueryTreeNodePtr & table_expression, ContextPtr context)
{
IdentifierResolveScope scope(node, nullptr /*parent_scope*/);
scope.context = context;
if (!scope.context)
scope.context = context;
auto node_type = node->getNodeType();
@ -2843,6 +2850,14 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
}
}
/** Nested subqueries cannot access outer subqueries table expressions from JOIN tree because
* that can prevent resolution of table expression from CTE.
*
* Example: WITH a AS (SELECT number FROM numbers(1)), b AS (SELECT number FROM a) SELECT * FROM a as l, b as r;
*/
if (identifier_lookup.isTableExpressionLookup())
identifier_resolve_settings.allow_to_check_join_tree = false;
while (scope_to_check != nullptr)
{
auto lookup_result = tryResolveIdentifier(identifier_lookup, *scope_to_check, identifier_resolve_settings);
@ -4042,7 +4057,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
auto constant_data_type = std::make_shared<DataTypeUInt64>();
auto in_subquery = std::make_shared<QueryNode>();
auto in_subquery = std::make_shared<QueryNode>(Context::createCopy(scope.context));
in_subquery->getProjection().getNodes().push_back(std::make_shared<ConstantNode>(1UL, constant_data_type));
in_subquery->getJoinTree() = exists_subquery_argument;
in_subquery->getLimit() = std::make_shared<ConstantNode>(1UL, constant_data_type);
@ -4095,7 +4110,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
projection_columns.emplace_back(column.name, column.type);
}
auto in_second_argument_query_node = std::make_shared<QueryNode>();
auto in_second_argument_query_node = std::make_shared<QueryNode>(Context::createCopy(scope.context));
in_second_argument_query_node->setIsSubquery(true);
in_second_argument_query_node->getProjectionNode() = std::move(column_nodes_to_select);
in_second_argument_query_node->getJoinTree() = std::move(in_second_argument);
@ -5756,14 +5771,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
max_subquery_depth);
auto & query_node_typed = query_node->as<QueryNode &>();
if (query_node_typed.hasSettingsChanges())
{
auto updated_scope_context = Context::createCopy(scope.context);
updated_scope_context->applySettingsChanges(query_node_typed.getSettingsChanges());
scope.context = std::move(updated_scope_context);
}
const auto & settings = scope.context->getSettingsRef();
if (settings.group_by_use_nulls)

View File

@ -56,7 +56,7 @@ public:
if (!isInt64OrUInt64FieldType(constant_value_literal.getType()))
return;
if (constant_value_literal.get<UInt64>() != 1)
if (constant_value_literal.get<UInt64>() != 1 || context->getSettingsRef().aggregate_functions_null_for_empty)
return;
function_node_arguments_nodes[0] = std::move(function_node_arguments_nodes[1]);

View File

@ -21,8 +21,10 @@
namespace DB
{
QueryNode::QueryNode()
QueryNode::QueryNode(ContextMutablePtr context_, SettingsChanges settings_changes_)
: IQueryTreeNode(children_size)
, context(std::move(context_))
, settings_changes(std::move(settings_changes_))
{
children[with_child_index] = std::make_shared<ListNode>();
children[projection_child_index] = std::make_shared<ListNode>();
@ -32,6 +34,10 @@ QueryNode::QueryNode()
children[limit_by_child_index] = std::make_shared<ListNode>();
}
QueryNode::QueryNode(ContextMutablePtr context_)
: QueryNode(context_, {} /*settings_changes*/)
{}
void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const
{
buffer << std::string(indent, ' ') << "QUERY id: " << format_state.getNodeId(this);
@ -222,7 +228,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
QueryTreeNodePtr QueryNode::cloneImpl() const
{
auto result_query_node = std::make_shared<QueryNode>();
auto result_query_node = std::make_shared<QueryNode>(context);
result_query_node->is_subquery = is_subquery;
result_query_node->is_cte = is_cte;

View File

@ -10,6 +10,8 @@
#include <Analyzer/ListNode.h>
#include <Analyzer/TableExpressionModifiers.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
@ -61,7 +63,41 @@ using QueryNodePtr = std::shared_ptr<QueryNode>;
class QueryNode final : public IQueryTreeNode
{
public:
explicit QueryNode();
/// Construct query node with context and changed settings
explicit QueryNode(ContextMutablePtr context_, SettingsChanges settings_changes_);
/// Construct query node with context
explicit QueryNode(ContextMutablePtr context_);
/// Get context
ContextPtr getContext() const
{
return context;
}
/// Get mutable context
const ContextMutablePtr & getMutableContext() const
{
return context;
}
/// Get mutable context
ContextMutablePtr & getMutableContext()
{
return context;
}
/// Returns true if query node has settings changes, false otherwise
bool hasSettingsChanges() const
{
return !settings_changes.empty();
}
/// Get query node settings changes
const SettingsChanges & getSettingsChanges() const
{
return settings_changes;
}
/// Returns true if query node is subquery, false otherwise
bool isSubquery() const
@ -513,24 +549,6 @@ public:
return children[offset_child_index];
}
/// Returns true if query node has settings changes specified, false otherwise
bool hasSettingsChanges() const
{
return !settings_changes.empty();
}
/// Get query node settings changes
const SettingsChanges & getSettingsChanges() const
{
return settings_changes;
}
/// Set query node settings changes value
void setSettingsChanges(SettingsChanges settings_changes_value)
{
settings_changes = std::move(settings_changes_value);
}
/// Get query node projection columns
const NamesAndTypes & getProjectionColumns() const
{
@ -572,6 +590,7 @@ private:
std::string cte_name;
NamesAndTypes projection_columns;
ContextMutablePtr context;
SettingsChanges settings_changes;
static constexpr size_t with_child_index = 0;

View File

@ -77,75 +77,90 @@ public:
}
private:
QueryTreeNodePtr buildSelectOrUnionExpression(const ASTPtr & select_or_union_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr buildSelectOrUnionExpression(const ASTPtr & select_or_union_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const;
QueryTreeNodePtr buildSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr buildSelectWithUnionExpression(const ASTPtr & select_with_union_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const;
QueryTreeNodePtr buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const;
QueryTreeNodePtr buildSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const;
QueryTreeNodePtr buildSelectExpression(const ASTPtr & select_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const;
QueryTreeNodePtr buildSortList(const ASTPtr & order_by_expression_list) const;
QueryTreeNodePtr buildSortList(const ASTPtr & order_by_expression_list, const ContextPtr & context) const;
QueryTreeNodePtr buildInterpolateList(const ASTPtr & interpolate_expression_list) const;
QueryTreeNodePtr buildInterpolateList(const ASTPtr & interpolate_expression_list, const ContextPtr & context) const;
QueryTreeNodePtr buildWindowList(const ASTPtr & window_definition_list) const;
QueryTreeNodePtr buildWindowList(const ASTPtr & window_definition_list, const ContextPtr & context) const;
QueryTreeNodePtr buildExpressionList(const ASTPtr & expression_list) const;
QueryTreeNodePtr buildExpressionList(const ASTPtr & expression_list, const ContextPtr & context) const;
QueryTreeNodePtr buildExpression(const ASTPtr & expression) const;
QueryTreeNodePtr buildExpression(const ASTPtr & expression, const ContextPtr & context) const;
QueryTreeNodePtr buildWindow(const ASTPtr & window_definition) const;
QueryTreeNodePtr buildWindow(const ASTPtr & window_definition, const ContextPtr & context) const;
QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query) const;
QueryTreeNodePtr buildJoinTree(const ASTPtr & tables_in_select_query, const ContextPtr & context) const;
ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const;
ColumnTransformersNodes buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const;
ASTPtr query;
ContextPtr context;
QueryTreeNodePtr query_tree_node;
};
QueryTreeBuilder::QueryTreeBuilder(ASTPtr query_, ContextPtr context_)
: query(query_->clone())
, context(std::move(context_))
{
if (query->as<ASTSelectWithUnionQuery>() ||
query->as<ASTSelectIntersectExceptQuery>() ||
query->as<ASTSelectQuery>())
query_tree_node = buildSelectOrUnionExpression(query, false /*is_subquery*/, {} /*cte_name*/);
query_tree_node = buildSelectOrUnionExpression(query, false /*is_subquery*/, {} /*cte_name*/, context_);
else if (query->as<ASTExpressionList>())
query_tree_node = buildExpressionList(query);
query_tree_node = buildExpressionList(query, context_);
else
query_tree_node = buildExpression(query);
query_tree_node = buildExpression(query, context_);
}
QueryTreeNodePtr QueryTreeBuilder::buildSelectOrUnionExpression(const ASTPtr & select_or_union_query, bool is_subquery, const std::string & cte_name) const
QueryTreeNodePtr QueryTreeBuilder::buildSelectOrUnionExpression(const ASTPtr & select_or_union_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const
{
QueryTreeNodePtr query_node;
if (select_or_union_query->as<ASTSelectWithUnionQuery>())
query_node = buildSelectWithUnionExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/);
query_node = buildSelectWithUnionExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/, context);
else if (select_or_union_query->as<ASTSelectIntersectExceptQuery>())
query_node = buildSelectIntersectExceptQuery(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/);
query_node = buildSelectIntersectExceptQuery(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/, context);
else if (select_or_union_query->as<ASTSelectQuery>())
query_node = buildSelectExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/);
query_node = buildSelectExpression(select_or_union_query, is_subquery /*is_subquery*/, cte_name /*cte_name*/, context);
else
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "SELECT or UNION query {} is not supported", select_or_union_query->formatForErrorMessage());
return query_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr & select_with_union_query, bool is_subquery, const std::string & cte_name) const
QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr & select_with_union_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const
{
auto & select_with_union_query_typed = select_with_union_query->as<ASTSelectWithUnionQuery &>();
auto & select_lists = select_with_union_query_typed.list_of_selects->as<ASTExpressionList &>();
if (select_lists.children.size() == 1)
return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name);
return buildSelectOrUnionExpression(select_lists.children[0], is_subquery, cte_name, context);
auto union_node = std::make_shared<UnionNode>(select_with_union_query_typed.union_mode);
auto union_node = std::make_shared<UnionNode>(Context::createCopy(context), select_with_union_query_typed.union_mode);
union_node->setIsSubquery(is_subquery);
union_node->setIsCTE(!cte_name.empty());
union_node->setCTEName(cte_name);
@ -156,20 +171,23 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectWithUnionExpression(const ASTPtr &
for (size_t i = 0; i < select_lists_children_size; ++i)
{
auto & select_list_node = select_lists.children[i];
QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/);
QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/, context);
union_node->getQueries().getNodes().push_back(std::move(query_node));
}
return union_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query, bool is_subquery, const std::string & cte_name) const
QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr & select_intersect_except_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const
{
auto & select_intersect_except_query_typed = select_intersect_except_query->as<ASTSelectIntersectExceptQuery &>();
auto select_lists = select_intersect_except_query_typed.getListOfSelects();
if (select_lists.size() == 1)
return buildSelectExpression(select_lists[0], is_subquery, cte_name);
return buildSelectExpression(select_lists[0], is_subquery, cte_name, context);
SelectUnionMode union_mode;
if (select_intersect_except_query_typed.final_operator == ASTSelectIntersectExceptQuery::Operator::INTERSECT_ALL)
@ -183,7 +201,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "UNION type is not initialized");
auto union_node = std::make_shared<UnionNode>(union_mode);
auto union_node = std::make_shared<UnionNode>(Context::createCopy(context), union_mode);
union_node->setIsSubquery(is_subquery);
union_node->setIsCTE(!cte_name.empty());
union_node->setCTEName(cte_name);
@ -194,17 +212,32 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectIntersectExceptQuery(const ASTPtr
for (size_t i = 0; i < select_lists_size; ++i)
{
auto & select_list_node = select_lists[i];
QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/);
QueryTreeNodePtr query_node = buildSelectOrUnionExpression(select_list_node, false /*is_subquery*/, {} /*cte_name*/, context);
union_node->getQueries().getNodes().push_back(std::move(query_node));
}
return union_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_query, bool is_subquery, const std::string & cte_name) const
QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_query,
bool is_subquery,
const std::string & cte_name,
const ContextPtr & context) const
{
const auto & select_query_typed = select_query->as<ASTSelectQuery &>();
auto current_query_tree = std::make_shared<QueryNode>();
auto updated_context = Context::createCopy(context);
auto select_settings = select_query_typed.settings();
SettingsChanges settings_changes;
if (select_settings)
{
auto & set_query = select_settings->as<ASTSetQuery &>();
updated_context->applySettingsChanges(set_query.changes);
settings_changes = set_query.changes;
}
auto current_query_tree = std::make_shared<QueryNode>(std::move(updated_context), std::move(settings_changes));
current_query_tree->setIsSubquery(is_subquery);
current_query_tree->setIsCTE(!cte_name.empty());
@ -218,30 +251,25 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
current_query_tree->setOriginalAST(select_query);
auto select_settings = select_query_typed.settings();
if (select_settings)
{
auto & set_query = select_settings->as<ASTSetQuery &>();
current_query_tree->setSettingsChanges(set_query.changes);
}
auto current_context = current_query_tree->getContext();
current_query_tree->getJoinTree() = buildJoinTree(select_query_typed.tables());
current_query_tree->getJoinTree() = buildJoinTree(select_query_typed.tables(), current_context);
auto select_with_list = select_query_typed.with();
if (select_with_list)
current_query_tree->getWithNode() = buildExpressionList(select_with_list);
current_query_tree->getWithNode() = buildExpressionList(select_with_list, current_context);
auto select_expression_list = select_query_typed.select();
if (select_expression_list)
current_query_tree->getProjectionNode() = buildExpressionList(select_expression_list);
current_query_tree->getProjectionNode() = buildExpressionList(select_expression_list, current_context);
auto prewhere_expression = select_query_typed.prewhere();
if (prewhere_expression)
current_query_tree->getPrewhere() = buildExpression(prewhere_expression);
current_query_tree->getPrewhere() = buildExpression(prewhere_expression, current_context);
auto where_expression = select_query_typed.where();
if (where_expression)
current_query_tree->getWhere() = buildExpression(where_expression);
current_query_tree->getWhere() = buildExpression(where_expression, current_context);
auto group_by_list = select_query_typed.groupBy();
if (group_by_list)
@ -254,56 +282,56 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
for (auto & grouping_sets_keys : group_by_children)
{
auto grouping_sets_keys_list_node = buildExpressionList(grouping_sets_keys);
auto grouping_sets_keys_list_node = buildExpressionList(grouping_sets_keys, current_context);
current_query_tree->getGroupBy().getNodes().emplace_back(std::move(grouping_sets_keys_list_node));
}
}
else
{
current_query_tree->getGroupByNode() = buildExpressionList(group_by_list);
current_query_tree->getGroupByNode() = buildExpressionList(group_by_list, current_context);
}
}
auto having_expression = select_query_typed.having();
if (having_expression)
current_query_tree->getHaving() = buildExpression(having_expression);
current_query_tree->getHaving() = buildExpression(having_expression, current_context);
auto window_list = select_query_typed.window();
if (window_list)
current_query_tree->getWindowNode() = buildWindowList(window_list);
current_query_tree->getWindowNode() = buildWindowList(window_list, current_context);
auto select_order_by_list = select_query_typed.orderBy();
if (select_order_by_list)
current_query_tree->getOrderByNode() = buildSortList(select_order_by_list);
current_query_tree->getOrderByNode() = buildSortList(select_order_by_list, current_context);
auto interpolate_list = select_query_typed.interpolate();
if (interpolate_list)
current_query_tree->getInterpolate() = buildInterpolateList(interpolate_list);
current_query_tree->getInterpolate() = buildInterpolateList(interpolate_list, current_context);
auto select_limit_by_limit = select_query_typed.limitByLength();
if (select_limit_by_limit)
current_query_tree->getLimitByLimit() = buildExpression(select_limit_by_limit);
current_query_tree->getLimitByLimit() = buildExpression(select_limit_by_limit, current_context);
auto select_limit_by_offset = select_query_typed.limitOffset();
if (select_limit_by_offset)
current_query_tree->getLimitByOffset() = buildExpression(select_limit_by_offset);
current_query_tree->getLimitByOffset() = buildExpression(select_limit_by_offset, current_context);
auto select_limit_by = select_query_typed.limitBy();
if (select_limit_by)
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by);
current_query_tree->getLimitByNode() = buildExpressionList(select_limit_by, current_context);
auto select_limit = select_query_typed.limitLength();
if (select_limit)
current_query_tree->getLimit() = buildExpression(select_limit);
current_query_tree->getLimit() = buildExpression(select_limit, current_context);
auto select_offset = select_query_typed.limitOffset();
if (select_offset)
current_query_tree->getOffset() = buildExpression(select_offset);
current_query_tree->getOffset() = buildExpression(select_offset, current_context);
return current_query_tree;
}
QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_expression_list) const
QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_expression_list, const ContextPtr & context) const
{
auto list_node = std::make_shared<ListNode>();
@ -324,7 +352,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
collator = std::make_shared<Collator>(order_by_element.collation->as<ASTLiteral &>().value.get<String &>());
const auto & sort_expression_ast = order_by_element.children.at(0);
auto sort_expression = buildExpression(sort_expression_ast);
auto sort_expression = buildExpression(sort_expression_ast, context);
auto sort_node = std::make_shared<SortNode>(std::move(sort_expression),
sort_direction,
nulls_sort_direction,
@ -332,11 +360,11 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
order_by_element.with_fill);
if (order_by_element.fill_from)
sort_node->getFillFrom() = buildExpression(order_by_element.fill_from);
sort_node->getFillFrom() = buildExpression(order_by_element.fill_from, context);
if (order_by_element.fill_to)
sort_node->getFillTo() = buildExpression(order_by_element.fill_to);
sort_node->getFillTo() = buildExpression(order_by_element.fill_to, context);
if (order_by_element.fill_step)
sort_node->getFillStep() = buildExpression(order_by_element.fill_step);
sort_node->getFillStep() = buildExpression(order_by_element.fill_step, context);
list_node->getNodes().push_back(std::move(sort_node));
}
@ -344,7 +372,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express
return list_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildInterpolateList(const ASTPtr & interpolate_expression_list) const
QueryTreeNodePtr QueryTreeBuilder::buildInterpolateList(const ASTPtr & interpolate_expression_list, const ContextPtr & context) const
{
auto list_node = std::make_shared<ListNode>();
@ -355,7 +383,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildInterpolateList(const ASTPtr & interpola
{
const auto & interpolate_element = expression->as<const ASTInterpolateElement &>();
auto expression_to_interpolate = std::make_shared<IdentifierNode>(Identifier(interpolate_element.column));
auto interpolate_expression = buildExpression(interpolate_element.expr);
auto interpolate_expression = buildExpression(interpolate_element.expr, context);
auto interpolate_node = std::make_shared<InterpolateNode>(std::move(expression_to_interpolate), std::move(interpolate_expression));
list_node->getNodes().push_back(std::move(interpolate_node));
@ -364,7 +392,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildInterpolateList(const ASTPtr & interpola
return list_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildWindowList(const ASTPtr & window_definition_list) const
QueryTreeNodePtr QueryTreeBuilder::buildWindowList(const ASTPtr & window_definition_list, const ContextPtr & context) const
{
auto list_node = std::make_shared<ListNode>();
@ -375,7 +403,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildWindowList(const ASTPtr & window_definit
{
const auto & window_list_element_typed = window_list_element->as<const ASTWindowListElement &>();
auto window_node = buildWindow(window_list_element_typed.definition);
auto window_node = buildWindow(window_list_element_typed.definition, context);
window_node->setAlias(window_list_element_typed.name);
list_node->getNodes().push_back(std::move(window_node));
@ -384,7 +412,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildWindowList(const ASTPtr & window_definit
return list_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildExpressionList(const ASTPtr & expression_list) const
QueryTreeNodePtr QueryTreeBuilder::buildExpressionList(const ASTPtr & expression_list, const ContextPtr & context) const
{
auto list_node = std::make_shared<ListNode>();
@ -393,14 +421,14 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpressionList(const ASTPtr & expression
for (auto & expression : expression_list_typed.children)
{
auto expression_node = buildExpression(expression);
auto expression_node = buildExpression(expression, context);
list_node->getNodes().push_back(std::move(expression_node));
}
return list_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) const
QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, const ContextPtr & context) const
{
QueryTreeNodePtr result;
@ -411,13 +439,13 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
}
else if (const auto * asterisk = expression->as<ASTAsterisk>())
{
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
result = std::make_shared<MatcherNode>(std::move(column_transformers));
}
else if (const auto * qualified_asterisk = expression->as<ASTQualifiedAsterisk>())
{
auto & qualified_identifier = qualified_asterisk->children.at(0)->as<ASTTableIdentifier &>();
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_transformers));
}
else if (const auto * ast_literal = expression->as<ASTLiteral>())
@ -466,7 +494,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
}
const auto & lambda_expression = lambda_arguments_and_expression.at(1);
auto lambda_expression_node = buildExpression(lambda_expression);
auto lambda_expression_node = buildExpression(lambda_expression, context);
result = std::make_shared<LambdaNode>(std::move(lambda_arguments), std::move(lambda_expression_node));
}
@ -478,20 +506,20 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
{
const auto & function_parameters_list = function->parameters->as<ASTExpressionList>()->children;
for (const auto & argument : function_parameters_list)
function_node->getParameters().getNodes().push_back(buildExpression(argument));
function_node->getParameters().getNodes().push_back(buildExpression(argument, context));
}
if (function->arguments)
{
const auto & function_arguments_list = function->arguments->as<ASTExpressionList>()->children;
for (const auto & argument : function_arguments_list)
function_node->getArguments().getNodes().push_back(buildExpression(argument));
function_node->getArguments().getNodes().push_back(buildExpression(argument, context));
}
if (function->is_window_function)
{
if (function->window_definition)
function_node->getWindowNode() = buildWindow(function->window_definition);
function_node->getWindowNode() = buildWindow(function->window_definition, context);
else
function_node->getWindowNode() = std::make_shared<IdentifierNode>(Identifier(function->window_name));
}
@ -502,20 +530,20 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
else if (const auto * subquery = expression->as<ASTSubquery>())
{
auto subquery_query = subquery->children[0];
auto query_node = buildSelectWithUnionExpression(subquery_query, true /*is_subquery*/, {} /*cte_name*/);
auto query_node = buildSelectWithUnionExpression(subquery_query, true /*is_subquery*/, {} /*cte_name*/, context);
result = std::move(query_node);
}
else if (const auto * with_element = expression->as<ASTWithElement>())
{
auto with_element_subquery = with_element->subquery->as<ASTSubquery &>().children.at(0);
auto query_node = buildSelectWithUnionExpression(with_element_subquery, true /*is_subquery*/, with_element->name /*cte_name*/);
auto query_node = buildSelectWithUnionExpression(with_element_subquery, true /*is_subquery*/, with_element->name /*cte_name*/, context);
result = std::move(query_node);
}
else if (const auto * columns_regexp_matcher = expression->as<ASTColumnsRegexpMatcher>())
{
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
result = std::make_shared<MatcherNode>(columns_regexp_matcher->getMatcher(), std::move(column_transformers));
}
else if (const auto * columns_list_matcher = expression->as<ASTColumnsListMatcher>())
@ -529,13 +557,13 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
}
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 0 /*start_child_index*/, context);
result = std::make_shared<MatcherNode>(std::move(column_list_identifiers), std::move(column_transformers));
}
else if (const auto * qualified_columns_regexp_matcher = expression->as<ASTQualifiedColumnsRegexpMatcher>())
{
auto & qualified_identifier = qualified_columns_regexp_matcher->children.at(0)->as<ASTTableIdentifier &>();
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), qualified_columns_regexp_matcher->getMatcher(), std::move(column_transformers));
}
else if (const auto * qualified_columns_list_matcher = expression->as<ASTQualifiedColumnsListMatcher>())
@ -551,7 +579,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
column_list_identifiers.emplace_back(Identifier{column_list_identifier.name_parts});
}
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/);
auto column_transformers = buildColumnTransformers(expression, 1 /*start_child_index*/, context);
result = std::make_shared<MatcherNode>(Identifier(qualified_identifier.name_parts), std::move(column_list_identifiers), std::move(column_transformers));
}
else
@ -567,7 +595,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression) co
return result;
}
QueryTreeNodePtr QueryTreeBuilder::buildWindow(const ASTPtr & window_definition) const
QueryTreeNodePtr QueryTreeBuilder::buildWindow(const ASTPtr & window_definition, const ContextPtr & context) const
{
const auto & window_definition_typed = window_definition->as<const ASTWindowDefinition &>();
WindowFrame window_frame;
@ -586,23 +614,23 @@ QueryTreeNodePtr QueryTreeBuilder::buildWindow(const ASTPtr & window_definition)
window_node->setParentWindowName(window_definition_typed.parent_window_name);
if (window_definition_typed.partition_by)
window_node->getPartitionByNode() = buildExpressionList(window_definition_typed.partition_by);
window_node->getPartitionByNode() = buildExpressionList(window_definition_typed.partition_by, context);
if (window_definition_typed.order_by)
window_node->getOrderByNode() = buildSortList(window_definition_typed.order_by);
window_node->getOrderByNode() = buildSortList(window_definition_typed.order_by, context);
if (window_definition_typed.frame_begin_offset)
window_node->getFrameBeginOffsetNode() = buildExpression(window_definition_typed.frame_begin_offset);
window_node->getFrameBeginOffsetNode() = buildExpression(window_definition_typed.frame_begin_offset, context);
if (window_definition_typed.frame_end_offset)
window_node->getFrameEndOffsetNode() = buildExpression(window_definition_typed.frame_end_offset);
window_node->getFrameEndOffsetNode() = buildExpression(window_definition_typed.frame_end_offset, context);
window_node->setOriginalAST(window_definition);
return window_node;
}
QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select_query) const
QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select_query, const ContextPtr & context) const
{
if (!tables_in_select_query)
{
@ -668,7 +696,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
auto & subquery_expression = table_expression.subquery->as<ASTSubquery &>();
const auto & select_with_union_query = subquery_expression.children[0];
auto node = buildSelectWithUnionExpression(select_with_union_query, true /*is_subquery*/, {} /*cte_name*/);
auto node = buildSelectWithUnionExpression(select_with_union_query, true /*is_subquery*/, {} /*cte_name*/, context);
node->setAlias(subquery_expression.tryGetAlias());
node->setOriginalAST(select_with_union_query);
@ -694,9 +722,9 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
for (const auto & argument : function_arguments_list)
{
if (argument->as<ASTSelectQuery>() || argument->as<ASTSelectWithUnionQuery>() || argument->as<ASTSelectIntersectExceptQuery>())
node->getArguments().getNodes().push_back(buildSelectOrUnionExpression(argument, false /*is_subquery*/, {} /*cte_name*/));
node->getArguments().getNodes().push_back(buildSelectOrUnionExpression(argument, false /*is_subquery*/, {} /*cte_name*/, context));
else
node->getArguments().getNodes().push_back(buildExpression(argument));
node->getArguments().getNodes().push_back(buildExpression(argument, context));
}
}
@ -726,9 +754,9 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
QueryTreeNodePtr join_expression;
if (table_join.using_expression_list)
join_expression = buildExpressionList(table_join.using_expression_list);
join_expression = buildExpressionList(table_join.using_expression_list, context);
else if (table_join.on_expression)
join_expression = buildExpression(table_join.on_expression);
join_expression = buildExpression(table_join.on_expression, context);
const auto & settings = context->getSettingsRef();
auto join_default_strictness = settings.join_default_strictness;
@ -785,7 +813,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
auto last_table_expression = std::move(table_expressions.back());
table_expressions.pop_back();
auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list);
auto array_join_expressions_list = buildExpressionList(array_join_expression.expression_list, context);
auto array_join_node = std::make_shared<ArrayJoinNode>(std::move(last_table_expression), std::move(array_join_expressions_list), is_left_array_join);
/** Original AST is not set because it will contain only array join part and does
@ -805,7 +833,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
}
ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index) const
ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr & matcher_expression, size_t start_child_index, const ContextPtr & context) const
{
ColumnTransformersNodes column_transformers;
size_t children_size = matcher_expression->children.size();
@ -818,14 +846,14 @@ ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr &
{
if (apply_transformer->lambda)
{
auto lambda_query_tree_node = buildExpression(apply_transformer->lambda);
auto lambda_query_tree_node = buildExpression(apply_transformer->lambda, context);
column_transformers.emplace_back(std::make_shared<ApplyColumnTransformerNode>(std::move(lambda_query_tree_node)));
}
else
{
auto function_node = std::make_shared<FunctionNode>(apply_transformer->func_name);
if (apply_transformer->parameters)
function_node->getParametersNode() = buildExpressionList(apply_transformer->parameters);
function_node->getParametersNode() = buildExpressionList(apply_transformer->parameters, context);
column_transformers.emplace_back(std::make_shared<ApplyColumnTransformerNode>(std::move(function_node)));
}
@ -856,7 +884,7 @@ ColumnTransformersNodes QueryTreeBuilder::buildColumnTransformers(const ASTPtr &
for (const auto & replace_transformer_child : replace_transformer->children)
{
auto & replacement = replace_transformer_child->as<ASTColumnsReplaceTransformer::Replacement &>();
replacements.emplace_back(ReplaceColumnTransformerNode::Replacement{replacement.name, buildExpression(replacement.expr)});
replacements.emplace_back(ReplaceColumnTransformerNode::Replacement{replacement.name, buildExpression(replacement.expr, context)});
}
column_transformers.emplace_back(std::make_shared<ReplaceColumnTransformerNode>(replacements, replace_transformer->is_strict));

View File

@ -13,6 +13,8 @@ namespace DB
* AST that represent query ASTSelectWithUnionQuery, ASTSelectIntersectExceptQuery, ASTSelectQuery.
* AST that represent a list of expressions ASTExpressionList.
* AST that represent expression ASTIdentifier, ASTAsterisk, ASTLiteral, ASTFunction.
*
* For QUERY and UNION nodes contexts are created with respect to specified SETTINGS.
*/
QueryTreeNodePtr buildQueryTree(ASTPtr query, ContextPtr context);

View File

@ -3,8 +3,6 @@
#include <Common/SipHash.h>
#include <Common/FieldVisitorToString.h>
#include <Core/NamesAndTypes.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
@ -18,9 +16,12 @@
#include <Parsers/ASTFunction.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/NamesAndTypes.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/Context.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/Utils.h>
@ -33,8 +34,9 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
UnionNode::UnionNode(SelectUnionMode union_mode_)
UnionNode::UnionNode(ContextMutablePtr context_, SelectUnionMode union_mode_)
: IQueryTreeNode(children_size)
, context(std::move(context_))
, union_mode(union_mode_)
{
if (union_mode == SelectUnionMode::UNION_DEFAULT ||
@ -129,7 +131,7 @@ void UnionNode::updateTreeHashImpl(HashState & state) const
QueryTreeNodePtr UnionNode::cloneImpl() const
{
auto result_union_node = std::make_shared<UnionNode>(union_mode);
auto result_union_node = std::make_shared<UnionNode>(context, union_mode);
result_union_node->is_subquery = is_subquery;
result_union_node->is_cte = is_cte;

View File

@ -3,12 +3,14 @@
#include <Core/NamesAndTypes.h>
#include <Core/Field.h>
#include <Parsers/SelectUnionMode.h>
#include <Analyzer/Identifier.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h>
#include <Analyzer/TableExpressionModifiers.h>
#include <Parsers/SelectUnionMode.h>
#include <Interpreters/Context_fwd.h>
namespace DB
{
@ -37,8 +39,26 @@ using UnionNodePtr = std::shared_ptr<UnionNode>;
class UnionNode final : public IQueryTreeNode
{
public:
/// Construct union node with normalized union mode
explicit UnionNode(SelectUnionMode union_mode_);
/// Construct union node with context and normalized union mode
explicit UnionNode(ContextMutablePtr context_, SelectUnionMode union_mode_);
/// Get context
ContextPtr getContext() const
{
return context;
}
/// Get mutable context
const ContextMutablePtr & getMutableContext() const
{
return context;
}
/// Get mutable context
ContextMutablePtr & getMutableContext()
{
return context;
}
/// Returns true if union node is subquery, false otherwise
bool isSubquery() const
@ -129,6 +149,7 @@ private:
bool is_subquery = false;
bool is_cte = false;
std::string cte_name;
ContextMutablePtr context;
SelectUnionMode union_mode;
static constexpr size_t queries_child_index = 0;

View File

@ -1,5 +1,9 @@
add_subdirectory(StringUtils)
if (ENABLE_BENCHMARKS)
add_subdirectory(benchmarks)
endif()
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif()

View File

@ -3,7 +3,6 @@
#include "Epoll.h"
#include <Common/Exception.h>
#include <unistd.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -70,9 +69,6 @@ size_t Epoll::getManyReady(int max_events, epoll_event * events_out, bool blocki
if (ready_size == -1 && errno != EINTR)
throwFromErrno("Error in epoll_wait", DB::ErrorCodes::EPOLL_ERROR);
if (errno == EINTR)
LOG_TEST(&Poco::Logger::get("Epoll"), "EINTR");
}
while (ready_size <= 0 && (ready_size != 0 || blocking));

View File

@ -164,8 +164,13 @@ public:
void get128(char * out)
{
finalize();
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
unalignedStore<UInt64>(out + 8, v0 ^ v1);
unalignedStore<UInt64>(out, v2 ^ v3);
#else
unalignedStore<UInt64>(out, v0 ^ v1);
unalignedStore<UInt64>(out + 8, v2 ^ v3);
#endif
}
template <typename T>

View File

@ -6,8 +6,6 @@
#include <fcntl.h>
#include <unistd.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -72,8 +70,6 @@ void TimerDescriptor::drain() const
if (errno != EINTR)
throwFromErrno("Cannot drain timer_fd", ErrorCodes::CANNOT_READ_FROM_SOCKET);
else
LOG_TEST(&Poco::Logger::get("TimerDescriptor"), "EINTR");
}
}
}

View File

@ -0,0 +1,9 @@
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
target_link_libraries (integer_hash_tables_and_hashes PRIVATE
ch_contrib::gbenchmark_all
dbms
ch_contrib::abseil_swiss_tables
ch_contrib::sparsehash
ch_contrib::wyhash
ch_contrib::farmhash
ch_contrib::xxHash)

View File

@ -1,5 +1,8 @@
#include <iostream>
#include <benchmark/benchmark.h>
#include <iomanip>
#include <iostream>
#include <random>
#include <vector>
#include <unordered_map>
@ -13,12 +16,23 @@
//#define DBMS_HASH_MAP_COUNT_COLLISIONS
//#define DBMS_HASH_MAP_DEBUG_RESIZES
#include <base/types.h>
#include <IO/ReadBufferFromFile.h>
#include <farmhash.h>
#include <wyhash.h>
#include <Compression/CompressedReadBuffer.h>
#include <IO/ReadBufferFromFile.h>
#include <base/types.h>
#include <Common/HashTable/HashMap.h>
#include <Common/SipHash.h>
#include <pcg-random/pcg_random.hpp>
#include <Common/randomSeed.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wused-but-marked-unused"
#endif
#include <xxhash.h>
using Key = UInt64;
using Value = UInt64;
@ -282,98 +296,91 @@ namespace Hashes
return res;
}
};
struct FarmHash
{
size_t operator()(Key x) const { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(reinterpret_cast<const char *>(&x), sizeof(x)); }
};
struct WyHash
{
size_t operator()(Key x) const { return wyhash(reinterpret_cast<const char *>(&x), sizeof(x), 0, _wyp); }
};
struct XXH3Hash
{
size_t operator()(Key x) const { return XXH_INLINE_XXH3_64bits(reinterpret_cast<const char *>(&x), sizeof(x)); }
};
}
template <template <typename...> class Map, typename Hash>
void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash> &)> init = {})
{
Stopwatch watch;
Map<Key, Value, Hash> map;
if (init)
init(map);
for (const auto * end = data + size; data < end; ++data)
++map[*data];
watch.stop();
std::cerr << __PRETTY_FUNCTION__
<< ":\nElapsed: " << watch.elapsedSeconds()
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
<< ", map size: " << map.size() << "\n";
}
template <template <typename...> class Map, typename Init>
void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
template <template <typename...> typename Map, typename Hash>
struct TestRndInput : public benchmark::Fixture
{
test<Map, Hashes::IdentityHash>(data, size, init);
test<Map, Hashes::SimpleMultiplyHash>(data, size, init);
test<Map, Hashes::MultiplyAndMixHash>(data, size, init);
test<Map, Hashes::MixMultiplyMixHash>(data, size, init);
test<Map, Hashes::MurMurMixHash>(data, size, init);
test<Map, Hashes::MixAllBitsHash>(data, size, init);
test<Map, Hashes::IntHash32>(data, size, init);
test<Map, Hashes::ArcadiaNumericHash>(data, size, init);
test<Map, Hashes::MurMurButDifferentHash>(data, size, init);
test<Map, Hashes::TwoRoundsTwoVarsHash>(data, size, init);
test<Map, Hashes::TwoRoundsLessOpsHash>(data, size, init);
test<Map, Hashes::CRC32Hash>(data, size, init);
test<Map, Hashes::MulShiftHash>(data, size, init);
test<Map, Hashes::TabulationHash>(data, size, init);
test<Map, Hashes::CityHash>(data, size, init);
test<Map, Hashes::SipHash>(data, size, init);
}
static void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
{
auto nothing = [](auto &){};
testForEachHash<HashMap>(data, size, nothing);
testForEachHash<std::unordered_map>(data, size, nothing);
testForEachHash<::google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
testForEachHash<::google::sparse_hash_map>(data, size, nothing);
testForEachHash<::absl::flat_hash_map>(data, size, nothing);
}
int main(int argc, char ** argv)
{
if (argc < 2)
void SetUp(const ::benchmark::State & state) override
{
std::cerr << "Usage: program n\n";
return 1;
pcg64_fast rng(randomSeed());
std::normal_distribution<double> dist(0, 10);
const size_t elements = state.range(0);
data.resize(elements);
for (auto & elem : data)
elem = static_cast<Key>(dist(rng)) % elements;
}
size_t n = std::stol(argv[1]);
// size_t m = std::stol(argv[2]);
std::cerr << std::fixed << std::setprecision(3);
std::vector<Key> data(n);
std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
void test(benchmark::State & st)
{
Stopwatch watch;
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * n);
watch.stop();
std::cerr
<< "Vector. Size: " << n
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
for (auto _ : st)
::test<HashMap, Hash>(data.data(), data.size());
}
/** Actually we should not run multiple test within same invocation of binary,
* because order of test could alter test results (due to state of allocator and various minor reasons),
* but in this case it's Ok.
*/
std::vector<Key> data;
};
testForEachMapAndHash(data.data(), data.size());
return 0;
}
#define OK_GOOGLE(Fixture, Map, Hash, N) \
BENCHMARK_TEMPLATE_DEFINE_F(Fixture, Test##Map##Hash, Map, Hashes::Hash)(benchmark::State & st) \
{ \
test(st); \
} \
BENCHMARK_REGISTER_F(Fixture, Test##Map##Hash)->Arg(N);
constexpr size_t elements_to_insert = 10'000'000;
/// tldr: crc32 has almost the same speed as identity hash if the corresponding intrinsics are available
/// todo: extend benchmark with larger key sizes up to say 24 bytes
OK_GOOGLE(TestRndInput, HashMap, ArcadiaNumericHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, CRC32Hash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, CityHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, FarmHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, IdentityHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, IntHash32, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MixAllBitsHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MixMultiplyMixHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MulShiftHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MultiplyAndMixHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MurMurButDifferentHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MurMurMixHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, SimpleMultiplyHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, SipHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, TabulationHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, TwoRoundsLessOpsHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, TwoRoundsTwoVarsHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, WyHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, XXH3Hash, elements_to_insert)
#ifdef __clang__
# pragma clang diagnostic pop
#endif

View File

@ -40,9 +40,6 @@ target_link_libraries (array_cache PRIVATE clickhouse_common_io)
clickhouse_add_executable (space_saving space_saving.cpp)
target_link_libraries (space_saving PRIVATE clickhouse_common_io)
clickhouse_add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)
clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp)
target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)

View File

@ -58,9 +58,13 @@ inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table)
value = uint_;
/// Use little endian
for (size_t i = 0; i < sizeof(TUInt); ++i)
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
{
if constexpr (std::endian::native == std::endian::little)
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[sizeof(TUInt) - 1 - i]) * 2], 2);
else
memcpy(out + i * 2, &table[static_cast<size_t>(uint8[i]) * 2], 2);
}
}
template <typename TUInt>

View File

@ -62,7 +62,7 @@ GTEST_TEST(WideInteger, Conversions)
zero += minus_one;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&zero, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(zero)));
ASSERT_EQ(0, memcmp(&zero, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE", sizeof(zero)));
#else
ASSERT_EQ(0, memcmp(&zero, "\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(zero)));
#endif
@ -160,7 +160,7 @@ GTEST_TEST(WideInteger, Arithmetic)
zero += minus_one;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&zero, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(zero)));
ASSERT_EQ(0, memcmp(&zero, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE", sizeof(zero)));
#else
ASSERT_EQ(0, memcmp(&zero, "\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(zero)));
#endif
@ -244,7 +244,7 @@ GTEST_TEST(WideInteger, Shift)
auto y = x << 64;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", sizeof(Int128)));
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00", sizeof(Int128)));
#else
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00", sizeof(Int128)));
#endif
@ -261,7 +261,7 @@ GTEST_TEST(WideInteger, Shift)
y = x << 16;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(Int128)));
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00", sizeof(Int128)));
#else
ASSERT_EQ(0, memcmp(&y, "\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(Int128)));
#endif
@ -269,18 +269,21 @@ GTEST_TEST(WideInteger, Shift)
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(Int128)));
y <<= 64;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00\x00\x00\x00\x00", sizeof(Int128)));
#else
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(Int128)));
#endif
y >>= 32;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(Int128)));
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x00\x00\x00\x00", sizeof(Int128)));
#else
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", sizeof(Int128)));
#endif
y <<= 64;
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF\xFF\xFF\x00\x00\x00\x00", sizeof(Int128)));
ASSERT_EQ(0, memcmp(&y, "\xFF\xFF\xFF\xFF\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", sizeof(Int128)));
#else
ASSERT_EQ(0, memcmp(&y, "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xFF\xFF\xFF\xFF", sizeof(Int128)));
#endif

View File

@ -44,10 +44,10 @@ DeflateQplJobHWPool::DeflateQplJobHWPool()
for (UInt32 index = 0; index < MAX_HW_JOB_NUMBER; ++index)
{
qpl_job * qpl_job_ptr = reinterpret_cast<qpl_job *>(hw_jobs_buffer.get() + index * job_size);
if (qpl_init_job(qpl_path_hardware, qpl_job_ptr) != QPL_STS_OK)
if (auto status = qpl_init_job(qpl_path_hardware, qpl_job_ptr); status != QPL_STS_OK)
{
job_pool_ready = false;
LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed, falling back to software DeflateQpl codec. Please check if Intel In-Memory Analytics Accelerator (IAA) is properly set up. QPL Version: {}.",qpl_version);
LOG_WARNING(log, "Initialization of hardware-assisted DeflateQpl codec failed: {} , falling back to software DeflateQpl codec. Please check if Intel In-Memory Analytics Accelerator (IAA) is properly set up. QPL Version: {}.", static_cast<UInt32>(status), qpl_version);
return;
}
hw_job_ptr_pool[index] = qpl_job_ptr;
@ -165,7 +165,7 @@ Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source
}
else
{
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
DeflateQplJobHWPool::instance().releaseJob(job_id);
return RET_ERROR;
}
@ -193,7 +193,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source,
if (auto status = qpl_submit_job(job_ptr); status != QPL_STS_OK)
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR;
}
/// Busy waiting till job complete.
@ -233,7 +233,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source,
else
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR;
}
}
@ -289,7 +289,7 @@ qpl_job * SoftwareCodecDeflateQpl::getJobCodecPtr()
// Job initialization
if (auto status = qpl_init_job(qpl_path_software, sw_job); status != QPL_STS_OK)
throw Exception(ErrorCodes::CANNOT_COMPRESS,
"Initialization of DeflateQpl software fallback codec failed. (Details: qpl_init_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
"Initialization of DeflateQpl software fallback codec failed. (Details: qpl_init_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
}
return sw_job;
}
@ -308,7 +308,7 @@ UInt32 SoftwareCodecDeflateQpl::doCompressData(const char * source, UInt32 sourc
if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK)
throw Exception(ErrorCodes::CANNOT_COMPRESS,
"Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
"Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return job_ptr->total_out;
}
@ -327,7 +327,7 @@ void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 sourc
if (auto status = qpl_execute_job(job_ptr); status != QPL_STS_OK)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS,
"Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", status);
"Execution of DeflateQpl software fallback codec failed. (Details: qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
}
CompressionCodecDeflateQpl::CompressionCodecDeflateQpl()

View File

@ -445,7 +445,7 @@ UInt8 getDataBytesSize(const IDataType * column_type)
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
return static_cast<UInt8>(max_size);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}",
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec DoubleDelta is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}",
column_type->getName());
}

View File

@ -344,7 +344,7 @@ UInt8 getDataBytesSize(const IDataType * column_type)
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
return static_cast<UInt8>(max_size);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Delta is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}",
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Gorilla is only applicable for data types of size 1, 2, 4, 8 bytes. Given type {}",
column_type->getName());
}

View File

@ -325,7 +325,7 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
{
auto asio_listener = asio_service->create_rpc_listener(state_manager->getPort(), logger, enable_ipv6);
if (!asio_listener)
return;
throw Exception(ErrorCodes::RAFT_ERROR, "Cannot create interserver listener on port {}", state_manager->getPort());
asio_listeners.emplace_back(std::move(asio_listener));
}
else
@ -924,14 +924,22 @@ KeeperLogInfo KeeperServer::getKeeperLogInfo()
{
KeeperLogInfo log_info;
auto log_store = state_manager->load_log_store();
log_info.first_log_idx = log_store->start_index();
log_info.first_log_term = log_store->term_at(log_info.first_log_idx);
log_info.last_log_idx = raft_instance->get_last_log_idx();
log_info.last_log_term = raft_instance->get_last_log_term();
log_info.last_committed_log_idx = raft_instance->get_committed_log_idx();
log_info.leader_committed_log_idx = raft_instance->get_leader_committed_log_idx();
log_info.target_committed_log_idx = raft_instance->get_target_committed_log_idx();
log_info.last_snapshot_idx = raft_instance->get_last_snapshot_idx();
if (log_store)
{
log_info.first_log_idx = log_store->start_index();
log_info.first_log_term = log_store->term_at(log_info.first_log_idx);
}
if (raft_instance)
{
log_info.last_log_idx = raft_instance->get_last_log_idx();
log_info.last_log_term = raft_instance->get_last_log_term();
log_info.last_committed_log_idx = raft_instance->get_committed_log_idx();
log_info.leader_committed_log_idx = raft_instance->get_leader_committed_log_idx();
log_info.target_committed_log_idx = raft_instance->get_target_committed_log_idx();
log_info.last_snapshot_idx = raft_instance->get_last_snapshot_idx();
}
return log_info;
}

View File

@ -546,7 +546,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
\
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
M(UInt64, max_size_to_preallocate_for_aggregation, 10'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
\
M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \
M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \
@ -583,6 +583,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
M(Bool, query_plan_read_in_order, true, "Use query plan for read-in-order optimisation", 0) \
M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard 'extractAllGroupsHorizontal' against consuming too much memory with greedy RE.", 0) \
\
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \

View File

@ -51,13 +51,13 @@ struct SortColumnDescription
SortColumnDescription() = default;
explicit SortColumnDescription(
const std::string & column_name_,
std::string column_name_,
int direction_ = 1,
int nulls_direction_ = 1,
const std::shared_ptr<Collator> & collator_ = nullptr,
bool with_fill_ = false,
const FillColumnDescription & fill_description_ = {})
: column_name(column_name_)
: column_name(std::move(column_name_))
, direction(direction_)
, nulls_direction(nulls_direction_)
, collator(collator_)

View File

@ -34,7 +34,6 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
/// Some functions may return different values on different shards/replicas, so it's not constant for distributed query
bool isSuitableForConstantFolding() const override { return !is_distributed; }

View File

@ -65,13 +65,27 @@ struct HexImpl
}
}
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out, bool reverse_order = false)
{
while (pos < end)
if (!reverse_order)
{
writeHexByteUppercase(*pos, out);
++pos;
out += word_size;
while (pos < end)
{
writeHexByteUppercase(*pos, out);
++pos;
out += word_size;
}
}
else
{
const auto * start_pos = pos;
pos = end - 1;
while (pos >= start_pos)
{
writeHexByteUppercase(*pos, out);
--pos;
out += word_size;
}
}
*out = '\0';
++out;
@ -95,7 +109,8 @@ struct HexImpl
for (size_t i = 0; i < size; ++i)
{
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
executeOneString(in_pos, in_pos + type_size_in_bytes, out);
bool reverse_order = (std::endian::native == std::endian::big);
executeOneString(in_pos, in_pos + type_size_in_bytes, out, reverse_order);
pos += hex_length;
out_offsets[i] = pos;
@ -174,7 +189,9 @@ struct BinImpl
for (size_t i = 0; i < size; ++i)
{
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
executeOneString(in_pos, in_pos + type_size_in_bytes, out);
bool reverse_order = (std::endian::native == std::endian::big);
executeOneString(in_pos, in_pos + type_size_in_bytes, out, reverse_order);
pos += hex_length;
out_offsets[i] = pos;
@ -182,13 +199,27 @@ struct BinImpl
col_res = std::move(col_str);
}
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out, bool reverse_order = false)
{
while (pos < end)
if (!reverse_order)
{
writeBinByte(*pos, out);
++pos;
out += word_size;
while (pos < end)
{
writeBinByte(*pos, out);
++pos;
out += word_size;
}
}
else
{
const auto * start_pos = pos;
pos = end - 1;
while (pos >= start_pos)
{
writeBinByte(*pos, out);
--pos;
out += word_size;
}
}
*out = '\0';
++out;

View File

@ -2670,8 +2670,6 @@ public:
String getName() const override { return cast_name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool hasInformationAboutMonotonicity() const override

View File

@ -79,8 +79,6 @@ public:
String getName() const override { return "FunctionExpression"; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
const DataTypes & getArgumentTypes() const override { return argument_types; }
@ -176,8 +174,6 @@ public:
String getName() const override { return name; }
bool isDeterministic() const override { return true; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
const DataTypes & getArgumentTypes() const override { return capture->captured_types; }

View File

@ -1,82 +1,174 @@
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsStringSearchToString.h>
#include <base/find_symbols.h>
namespace DB
{
struct CutURLParameterImpl
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
}
class FunctionCutURLParameter : public IFunction
{
public:
static constexpr auto name = "cutURLParameter";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionCutURLParameter>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[0]->getName(), getName());
if (!isString(arguments[1]) && !isArray(arguments[1]))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of argument of function {}",
arguments[1]->getName(), getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const ColumnPtr column = arguments[0].column;
const ColumnPtr column_needle = arguments[1].column;
const ColumnConst * col_needle = typeid_cast<const ColumnConst *>(&*column_needle);
const ColumnArray * col_needle_const_array = checkAndGetColumnConstData<ColumnArray>(column_needle.get());
if (!col_needle && !col_needle_const_array)
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Second argument of function {} must be constant string or constant array",
getName());
if (col_needle_const_array)
{
if (!col_needle_const_array->getData().empty() && typeid_cast<const DataTypeArray &>(*arguments[1].type).getNestedType()->getTypeId() != TypeIndex::String)
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Second argument of function {} must be constant array of strings",
getName());
}
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
{
auto col_res = ColumnString::create();
ColumnString::Chars & vec_res = col_res->getChars();
ColumnString::Offsets & offsets_res = col_res->getOffsets();
vector(col->getChars(), col->getOffsets(), col_needle, col_needle_const_array, vec_res, offsets_res);
return col_res;
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of argument of function {}",
arguments[0].column->getName(), getName());
}
static void cutURL(ColumnString::Chars & data, String pattern, size_t prev_offset, size_t & cur_offset)
{
pattern += '=';
const char * param_str = pattern.c_str();
size_t param_len = pattern.size();
const char * url_begin = reinterpret_cast<const char *>(&data[prev_offset]);
const char * url_end = reinterpret_cast<const char *>(&data[cur_offset - 2]);
const char * begin_pos = url_begin;
const char * end_pos = begin_pos;
do
{
const char * query_string_begin = find_first_symbols<'?', '#'>(url_begin, url_end);
if (query_string_begin + 1 >= url_end)
break;
const char * pos = static_cast<const char *>(memmem(query_string_begin + 1, url_end - query_string_begin - 1, param_str, param_len));
if (pos == nullptr)
break;
if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&')
{
pos = nullptr;
break;
}
begin_pos = pos;
end_pos = begin_pos + param_len;
/// Skip the value.
while (*end_pos && *end_pos != '&' && *end_pos != '#')
++end_pos;
/// Capture '&' before or after the parameter.
if (*end_pos == '&')
++end_pos;
else if (begin_pos[-1] == '&')
--begin_pos;
} while (false);
size_t cut_length = end_pos - begin_pos;
cur_offset -= cut_length;
data.erase(data.begin() + prev_offset + (begin_pos - url_begin), data.begin() + prev_offset+ (end_pos - url_begin));
}
static void vector(const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
std::string pattern,
const ColumnConst * col_needle,
const ColumnArray * col_needle_const_array,
ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
{
res_data.reserve(data.size());
res_offsets.resize(offsets.size());
pattern += '=';
const char * param_str = pattern.c_str();
size_t param_len = pattern.size();
size_t prev_offset = 0;
size_t cur_offset;
size_t cur_len;
size_t res_offset = 0;
size_t cur_res_offset;
for (size_t i = 0; i < offsets.size(); ++i)
{
size_t cur_offset = offsets[i];
cur_offset = offsets[i];
cur_len = cur_offset - prev_offset;
cur_res_offset = res_offset + cur_len;
res_data.resize(cur_res_offset);
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset], cur_len);
const char * url_begin = reinterpret_cast<const char *>(&data[prev_offset]);
const char * url_end = reinterpret_cast<const char *>(&data[cur_offset]) - 1;
const char * begin_pos = url_begin;
const char * end_pos = begin_pos;
do
if (col_needle_const_array)
{
const char * query_string_begin = find_first_symbols<'?', '#'>(url_begin, url_end);
if (query_string_begin + 1 >= url_end)
break;
const char * pos = static_cast<const char *>(memmem(query_string_begin + 1, url_end - query_string_begin - 1, param_str, param_len));
if (pos == nullptr)
break;
if (pos[-1] != '?' && pos[-1] != '#' && pos[-1] != '&')
size_t num_needles = col_needle_const_array->getData().size();
for (size_t j = 0; j < num_needles; ++j)
{
pos = nullptr;
break;
auto field = col_needle_const_array->getData()[j];
cutURL(res_data, field.get<String>(), res_offset, cur_res_offset);
}
begin_pos = pos;
end_pos = begin_pos + param_len;
/// Skip the value.
while (*end_pos && *end_pos != '&' && *end_pos != '#')
++end_pos;
/// Capture '&' before or after the parameter.
if (*end_pos == '&')
++end_pos;
else if (begin_pos[-1] == '&')
--begin_pos;
} while (false);
size_t cut_length = (url_end - url_begin) - (end_pos - begin_pos);
res_data.resize(res_offset + cut_length + 1);
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], url_begin, begin_pos - url_begin);
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset] + (begin_pos - url_begin), end_pos, url_end - end_pos);
res_offset += cut_length + 1;
res_data[res_offset - 1] = 0;
res_offsets[i] = res_offset;
}
else
{
cutURL(res_data, col_needle->getValue<String>(), res_offset, cur_res_offset);
}
res_offsets[i] = cur_res_offset;
res_offset = cur_res_offset;
prev_offset = cur_offset;
}
}
};
struct NameCutURLParameter { static constexpr auto name = "cutURLParameter"; };
using FunctionCutURLParameter = FunctionsStringSearchToString<CutURLParameterImpl, NameCutURLParameter>;
REGISTER_FUNCTION(CutURLParameter)
{
factory.registerFunction<FunctionCutURLParameter>();

View File

@ -59,7 +59,6 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
private:

View File

@ -38,7 +38,10 @@ public:
}
/** It could return many different values for single argument. */
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{

View File

@ -26,7 +26,10 @@ public:
return name;
}
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{

View File

@ -50,11 +50,6 @@ public:
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
bool isDeterministicInScopeOfQuery() const override
{
return true;
}
/// getMacro may return different values on different shards/replicas, so it's not constant for distributed query
bool isSuitableForConstantFolding() const override { return !is_distributed; }

View File

@ -105,11 +105,6 @@ public:
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override
{
return true;
}
bool isSuitableForConstantFolding() const override { return !is_distributed; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }

View File

@ -56,7 +56,6 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForConstantFolding() const override { return !is_distributed; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
@ -89,7 +88,6 @@ public:
size_t getNumberOfArguments() const override { return 1; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & data_types) const override
{

View File

@ -62,9 +62,15 @@ public:
return std::make_unique<ExecutableFunctionNow>(time_value);
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override
{
return false;
}
private:
time_t time_value;

View File

@ -87,9 +87,15 @@ public:
return std::make_unique<ExecutableFunctionNow64>(time_value);
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override
{
return false;
}
private:
Field time_value;

View File

@ -59,8 +59,10 @@ public:
return std::make_unique<ExecutableFunctionRandomConstant<ToType, Name>>(value);
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isDeterministic() const override
{
return false;
}
private:
ToType value;

View File

@ -39,7 +39,10 @@ public:
return 0;
}
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{

View File

@ -34,7 +34,10 @@ public:
return 0;
}
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{

View File

@ -52,7 +52,10 @@ public:
size_t getNumberOfArguments() const override { return 0; }
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{

View File

@ -147,7 +147,11 @@ public:
return 1;
}
bool isDeterministic() const override { return false; }
bool isDeterministic() const override
{
return false;
}
bool isDeterministicInScopeOfQuery() const override
{
return false;

View File

@ -53,7 +53,6 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
private:

View File

@ -51,7 +51,6 @@ public:
}
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
private:

View File

@ -334,7 +334,7 @@ std::string HTTPException::makeExceptionMessage(
"Received error from remote server {}. "
"HTTP status code: {} {}, "
"body: {}",
uri, http_status, reason, body);
uri, static_cast<int>(http_status), reason, body);
}
}

View File

@ -1078,7 +1078,7 @@ inline void readBinaryBigEndian(T & x, ReadBuffer & buf) /// Assuming little
{
for (size_t i = 0; i != std::size(x.items); ++i)
{
auto & item = x.items[std::size(x.items) - i - 1];
auto & item = x.items[(std::endian::native == std::endian::little) ? std::size(x.items) - i - 1 : i];
readBinaryBigEndian(item, buf);
}
}

View File

@ -1126,7 +1126,7 @@ inline void writeBinaryBigEndian(const T & x, WriteBuffer & buf) /// Assuming
{
for (size_t i = 0; i != std::size(x.items); ++i)
{
const auto & item = x.items[std::size(x.items) - i - 1];
const auto & item = x.items[(std::endian::native == std::endian::little) ? std::size(x.items) - i - 1 : i];
writeBinaryBigEndian(item, buf);
}
}

View File

@ -221,16 +221,31 @@ void initDataVariantsWithSizeHint(
const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1;
const auto lower_limit = hint->sum_of_sizes / max_threads;
const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads;
const auto adjusted = std::min(std::max(lower_limit, hint->median_size), upper_limit);
if (worthConvertToTwoLevel(
params.group_by_two_level_threshold,
hint->sum_of_sizes,
/*group_by_two_level_threshold_bytes*/ 0,
/*result_size_bytes*/ 0))
method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
result.init(method_chosen, adjusted);
ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
return;
if (hint->median_size > upper_limit)
{
/// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway.
/// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown.
/// So let's just do nothing.
LOG_TRACE(
&Poco::Logger::get("Aggregator"),
"No space were preallocated in hash tables because 'max_size_to_preallocate_for_aggregation' has too small value: {}, "
"should be at least {}",
stats_collecting_params.max_size_to_preallocate_for_aggregation,
hint->median_size * max_threads);
}
else
{
const auto adjusted = std::max(lower_limit, hint->median_size);
if (worthConvertToTwoLevel(
params.group_by_two_level_threshold,
hint->sum_of_sizes,
/*group_by_two_level_threshold_bytes*/ 0,
/*result_size_bytes*/ 0))
method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
result.init(method_chosen, adjusted);
ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
return;
}
}
}
result.init(method_chosen);
@ -488,7 +503,6 @@ Aggregator::AggregateColumnsConstData Aggregator::Params::makeAggregateColumnsDa
void Aggregator::Params::explain(WriteBuffer & out, size_t indent) const
{
Strings res;
String prefix(indent, ' ');
{
@ -931,7 +945,10 @@ void Aggregator::executeOnBlockSmall(
/// How to perform the aggregation?
if (result.empty())
{
initDataVariantsWithSizeHint(result, method_chosen, params);
if (method_chosen != AggregatedDataVariants::Type::without_key)
initDataVariantsWithSizeHint(result, method_chosen, params);
else
result.init(method_chosen);
result.keys_size = params.keys_size;
result.key_sizes = key_sizes;
}

View File

@ -16,31 +16,6 @@
#include <QueryPipeline/Pipe.h>
#include <Storages/SelectQueryInfo.h>
using namespace DB;
namespace
{
/// We determine output stream sort properties by a local plan (local because otherwise table could be unknown).
/// If no local shard exist for this cluster, no sort properties will be provided, c'est la vie.
auto getRemoteShardsOutputStreamSortingProperties(const std::vector<QueryPlanPtr> & plans, ContextMutablePtr context)
{
SortDescription sort_description;
DataStream::SortScope sort_scope = DataStream::SortScope::None;
if (!plans.empty())
{
if (const auto * step = dynamic_cast<const ITransformingStep *>(plans.front()->getRootNode()->step.get());
step && step->getDataStreamTraits().can_enforce_sorting_properties_in_distributed_query)
{
step->adjustSettingsToEnforceSortingPropertiesInDistributedQuery(context);
sort_description = step->getOutputStream().sort_description;
sort_scope = step->getOutputStream().sort_scope;
}
}
return std::make_pair(sort_description, sort_scope);
}
}
namespace DB
{
@ -216,8 +191,6 @@ void executeQuery(
"_shard_count", Block{{DataTypeUInt32().createColumnConst(1, shards), std::make_shared<DataTypeUInt32>(), "_shard_count"}});
auto external_tables = context->getExternalTables();
auto && [sort_description, sort_scope] = getRemoteShardsOutputStreamSortingProperties(plans, new_context);
auto plan = std::make_unique<QueryPlan>();
auto read_from_remote = std::make_unique<ReadFromRemote>(
std::move(remote_shards),
@ -231,9 +204,7 @@ void executeQuery(
std::move(external_tables),
log,
shards,
query_info.storage_limits,
std::move(sort_description),
std::move(sort_scope));
query_info.storage_limits);
read_from_remote->setStepDescription("Read from remote replica");
plan->addStep(std::move(read_from_remote));
@ -329,7 +300,6 @@ void executeQueryWithParallelReplicas(
if (!remote_shards.empty())
{
auto new_context = Context::createCopy(context);
auto && [sort_description, sort_scope] = getRemoteShardsOutputStreamSortingProperties(plans, new_context);
for (const auto & shard : remote_shards)
{
@ -345,9 +315,7 @@ void executeQueryWithParallelReplicas(
scalars,
external_tables,
&Poco::Logger::get("ReadFromParallelRemoteReplicasStep"),
query_info.storage_limits,
sort_description,
sort_scope);
query_info.storage_limits);
auto remote_plan = std::make_unique<QueryPlan>();
remote_plan->addStep(std::move(read_from_remote));

View File

@ -1525,9 +1525,9 @@ void Context::setCurrentQueryId(const String & query_id)
client_info.initial_query_id = client_info.current_query_id;
}
void Context::killCurrentQuery()
void Context::killCurrentQuery() const
{
if (auto elem = process_list_elem.lock())
if (auto elem = getProcessListElement())
elem->cancelQuery(true);
}
@ -1782,11 +1782,16 @@ void Context::setProcessListElement(QueryStatusPtr elem)
{
/// Set to a session or query. In the session, only one query is processed at a time. Therefore, the lock is not needed.
process_list_elem = elem;
has_process_list_elem = elem.get();
}
QueryStatusPtr Context::getProcessListElement() const
{
return process_list_elem.lock();
if (!has_process_list_elem)
return {};
if (auto res = process_list_elem.lock())
return res;
throw Exception(ErrorCodes::LOGICAL_ERROR, "Weak pointer to process_list_elem expired during query execution, it's a bug");
}

View File

@ -239,6 +239,7 @@ private:
FileProgressCallback file_progress_callback; /// Callback for tracking progress of file loading.
std::weak_ptr<QueryStatus> process_list_elem; /// For tracking total resource usage for query.
bool has_process_list_elem = false; /// It's impossible to check if weak_ptr was initialized or not
StorageID insertion_table = StorageID::createEmpty(); /// Saved insertion table in query context
bool is_distributed = false; /// Whether the current context it used for distributed query
@ -629,7 +630,7 @@ public:
void setCurrentDatabaseNameInGlobalContext(const String & name);
void setCurrentQueryId(const String & query_id);
void killCurrentQuery();
void killCurrentQuery() const;
bool hasInsertionTable() const { return !insertion_table.empty(); }
void setInsertionTable(StorageID db_and_table) { insertion_table = std::move(db_and_table); }

Some files were not shown because too many files have changed in this diff Show More