diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6f2fd5d678d..3d22cb984dd 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -887,6 +887,51 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinAarch64V80Compat: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_aarch64_v80compat + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -972,6 +1017,7 @@ jobs: # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64SSE2 + - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 24a1c6bb714..2795dc62d6d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -940,6 +940,49 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinAarch64V80Compat: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_aarch64_v80compat + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1025,6 +1068,7 @@ jobs: # - BuilderBinGCC - BuilderBinPPC64 - BuilderBinAmd64SSE2 + - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/CMakeLists.txt b/CMakeLists.txt index 64fb870b61b..c737046a5f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,7 +18,7 @@ include (cmake/target.cmake) include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) -include (cmake/git_status.cmake) +include (cmake/git.cmake) # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() diff --git a/base/base/CachedFn.h b/base/base/CachedFn.h deleted file mode 100644 index 19b2a8ce2c0..00000000000 --- a/base/base/CachedFn.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include -#include -#include -#include "FnTraits.h" - -/** - * Caching proxy for a functor that decays to a pointer-to-function. - * Saves pairs (func args, func result on args). - * Cache size is unlimited. Cache items are evicted only on manual drop. - * Invocation/update is O(log(saved cache values)). - * - * See Common/tests/cached_fn.cpp for examples. - */ -template -struct CachedFn -{ -private: - using Traits = FnTraits; - using DecayedArgs = TypeListMap; - using Key = TypeListChangeRoot; - using Result = typename Traits::Ret; - - std::map cache; // Can't use hashmap as tuples are unhashable by default - mutable std::mutex mutex; - -public: - template - Result operator()(Args && ...args) - { - Key key{std::forward(args)...}; - - { - std::lock_guard lock(mutex); - - if (auto it = cache.find(key); it != cache.end()) - return it->second; - } - - Result res = std::apply(Func, key); - - { - std::lock_guard lock(mutex); - cache.emplace(std::move(key), res); - } - - return res; - } - - template - void update(Args && ...args) - { - Key key{std::forward(args)...}; - Result res = std::apply(Func, key); - - { - std::lock_guard lock(mutex); - // TODO Can't use emplace(std::move(key), ..), causes test_host_ip_change errors. - cache[key] = std::move(res); - } - } - - size_t size() const - { - std::lock_guard lock(mutex); - return cache.size(); - } - - void drop() - { - std::lock_guard lock(mutex); - cache.clear(); - } -}; diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 75c48f690f8..916d4f9a74d 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -13,8 +14,10 @@ #include #include #include +#include #include - +#include +#include /// is_any_of namespace { @@ -35,6 +38,166 @@ std::string getEditor() return editor; } +std::string getFuzzyFinder() +{ + const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) + + if (!env_path || !*env_path) + return {}; + + std::vector paths; + boost::split(paths, env_path, boost::is_any_of(":")); + for (const auto & path_str : paths) + { + std::filesystem::path path(path_str); + std::filesystem::path sk_bin_path = path / "sk"; + if (!access(sk_bin_path.c_str(), X_OK)) + return sk_bin_path; + + std::filesystem::path fzf_bin_path = path / "fzf"; + if (!access(fzf_bin_path.c_str(), X_OK)) + return fzf_bin_path; + } + + return {}; +} + +/// See comments in ShellCommand::executeImpl() +/// (for the vfork via dlsym()) +int executeCommand(char * const argv[]) +{ +#if !defined(USE_MUSL) + /** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs, + * because of the resolving of symbols in the shared library + * http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html + * Therefore, separate the resolving of the symbol from the call. + */ + static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); +#else + /// If we use Musl with static linking, there is no dlsym and no issue with vfork. + static void * real_vfork = reinterpret_cast(&vfork); +#endif + if (!real_vfork) + throw std::runtime_error("Cannot find vfork symbol"); + + pid_t pid = reinterpret_cast(real_vfork)(); + + if (-1 == pid) + throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString())); + + /// Child + if (0 == pid) + { + sigset_t mask; + sigemptyset(&mask); + sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process + + execvp(argv[0], argv); + _exit(-1); + } + + int status = 0; + do + { + int exited_pid = waitpid(pid, &status, 0); + if (exited_pid != -1) + break; + + if (errno == EINTR) + continue; + + throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString())); + } while (true); + + return status; +} + +void writeRetry(int fd, const std::string & data) +{ + size_t bytes_written = 0; + const char * begin = data.c_str(); + size_t offset = data.size(); + + while (bytes_written != offset) + { + ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); + if ((-1 == res || 0 == res) && errno != EINTR) + throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString())); + bytes_written += res; + } +} +std::string readFile(const std::string & path) +{ + std::ifstream t(path); + std::string str; + t.seekg(0, std::ios::end); + str.reserve(t.tellg()); + t.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); + return str; +} + +/// Simple wrapper for temporary files. +class TemporaryFile +{ +private: + std::string path; + int fd = -1; + +public: + explicit TemporaryFile(const char * pattern) + : path(pattern) + { + size_t dot_pos = path.rfind('.'); + if (dot_pos != std::string::npos) + fd = ::mkstemps(path.data(), path.size() - dot_pos); + else + fd = ::mkstemp(path.data()); + + if (-1 == fd) + throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString())); + } + ~TemporaryFile() + { + try + { + close(); + unlink(); + } + catch (const std::runtime_error & e) + { + fmt::print(stderr, "{}", e.what()); + } + } + + void close() + { + if (fd == -1) + return; + + if (0 != ::close(fd)) + throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString())); + fd = -1; + } + + void write(const std::string & data) + { + if (fd == -1) + throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path)); + + writeRetry(fd, data); + } + + void unlink() + { + if (0 != ::unlink(path.c_str())) + throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString())); + } + + std::string & getPath() { return path; } +}; + /// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. /// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) /// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) @@ -142,6 +305,7 @@ ReplxxLineReader::ReplxxLineReader( replxx::Replxx::highlighter_callback_t highlighter_) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_)) , editor(getEditor()) + , fuzzy_finder(getFuzzyFinder()) { using namespace std::placeholders; using Replxx = replxx::Replxx; @@ -249,6 +413,17 @@ ReplxxLineReader::ReplxxLineReader( return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); + + /// interactive search in history (requires fzf/sk) + if (!fuzzy_finder.empty()) + { + auto interactive_history_search = [this](char32_t code) + { + openInteractiveHistorySearch(); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + } } ReplxxLineReader::~ReplxxLineReader() @@ -293,116 +468,70 @@ void ReplxxLineReader::addToHistory(const String & line) rx.print("Unlock of history file failed: %s\n", errnoToString().c_str()); } -/// See comments in ShellCommand::executeImpl() -/// (for the vfork via dlsym()) -int ReplxxLineReader::executeEditor(const std::string & path) -{ - std::vector argv0(editor.data(), editor.data() + editor.size() + 1); - std::vector argv1(path.data(), path.data() + path.size() + 1); - char * const argv[] = {argv0.data(), argv1.data(), nullptr}; - - static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork"); - if (!real_vfork) - { - rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str()); - return -1; - } - - pid_t pid = reinterpret_cast(real_vfork)(); - - if (-1 == pid) - { - rx.print("Cannot vfork: %s\n", errnoToString().c_str()); - return -1; - } - - /// Child - if (0 == pid) - { - sigset_t mask; - sigemptyset(&mask); - sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process - - execvp(editor.c_str(), argv); - rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str()); - _exit(-1); - } - - int status = 0; - do - { - int exited_pid = waitpid(pid, &status, 0); - if (exited_pid == -1) - { - if (errno == EINTR) - continue; - - rx.print("Cannot waitpid: %s\n", errnoToString().c_str()); - return -1; - } - else - break; - } while (true); - return status; -} - void ReplxxLineReader::openEditor() { - char filename[] = "clickhouse_replxx_XXXXXX.sql"; - int fd = ::mkstemps(filename, 4); - if (-1 == fd) - { - rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str()); - return; - } + TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql"); + editor_file.write(rx.get_state().text()); + editor_file.close(); - replxx::Replxx::State state(rx.get_state()); - - size_t bytes_written = 0; - const char * begin = state.text(); - size_t offset = strlen(state.text()); - while (bytes_written != offset) + char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr}; + try { - ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written); - if ((-1 == res || 0 == res) && errno != EINTR) + if (executeCommand(argv) == 0) { - rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str()); - break; + const std::string & new_query = readFile(editor_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); } - bytes_written += res; } - - if (0 != ::close(fd)) + catch (const std::runtime_error & e) { - rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } - - if (0 == executeEditor(filename)) - { - try - { - std::ifstream t(filename); - std::string str; - t.seekg(0, std::ios::end); - str.reserve(t.tellg()); - t.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(t)), std::istreambuf_iterator()); - rx.set_state(replxx::Replxx::State(str.c_str(), str.size())); - } - catch (...) - { - rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str()); - return; - } + rx.print(e.what()); } if (bracketed_paste_enabled) enableBracketedPaste(); +} - if (0 != ::unlink(filename)) - rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str()); +void ReplxxLineReader::openInteractiveHistorySearch() +{ + assert(!fuzzy_finder.empty()); + TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); + auto hs(rx.history_scan()); + while (hs.next()) + { + history_file.write(hs.get().text()); + history_file.write(std::string(1, '\0')); + } + history_file.close(); + + TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); + output_file.close(); + + char sh[] = "sh"; + char sh_c[] = "-c"; + /// NOTE: You can use one of the following to configure the behaviour additionally: + /// - SKIM_DEFAULT_OPTIONS + /// - FZF_DEFAULT_OPTS + std::string fuzzy_finder_command = fmt::format( + "{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}", + fuzzy_finder, history_file.getPath(), output_file.getPath()); + char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; + + try + { + if (executeCommand(argv) == 0) + { + const std::string & new_query = readFile(output_file.getPath()); + rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size())); + } + } + catch (const std::runtime_error & e) + { + rx.print(e.what()); + } + + if (bracketed_paste_enabled) + enableBracketedPaste(); } void ReplxxLineReader::enableBracketedPaste() diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index b9ec214d02c..fea1405a208 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -27,6 +27,7 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); + void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; @@ -36,4 +37,5 @@ private: bool bracketed_paste_enabled = false; std::string editor; + std::string fuzzy_finder; }; diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 218b4deedce..f9b2f103f49 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -11,49 +11,86 @@ cmake_push_check_state () # All of them are unrelated to the instruction set at the host machine # (you can compile for newer instruction set on old machines and vice versa). -option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1) -option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1) -option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1) -option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) -option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) -option (ENABLE_AVX "Use AVX instructions on x86_64" 0) -option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) -option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) -option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) -option (ENABLE_BMI "Use BMI instructions on x86_64" 0) -option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) -option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) - -# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware. -option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0) -if (NO_SSE3_OR_HIGHER) - SET(ENABLE_SSSE3 0) - SET(ENABLE_SSE41 0) - SET(ENABLE_SSE42 0) - SET(ENABLE_PCLMULQDQ 0) - SET(ENABLE_POPCNT 0) - SET(ENABLE_AVX 0) - SET(ENABLE_AVX2 0) - SET(ENABLE_AVX512 0) - SET(ENABLE_AVX512_VBMI 0) - SET(ENABLE_BMI 0) - SET(ENABLE_AVX2_FOR_SPEC_OP 0) - SET(ENABLE_AVX512_FOR_SPEC_OP 0) -endif() - option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) if (ARCH_NATIVE) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native") elseif (ARCH_AARCH64) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc+simd+crypto+dotprod+ssbs") + # ARM publishes almost every year a new revision of it's ISA [1]. Each version comes with new mandatory and optional features from + # which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum + # compatibility intended to run on all 64-bit ARM hardware released after 2013 (e.g. Raspberry Pi 4), and one for modern ARM server + # CPUs, (e.g. Graviton). + # + # [1] https://en.wikipedia.org/wiki/AArch64 + option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." 0) + + if (NO_ARMV81_OR_HIGHER) + # crc32 is optional in v8.0 and mandatory in v8.1. Enable it as __crc32()* is used in lot's of places and even very old ARM CPUs + # support it. + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc") + else () + # ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it + # includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag + # "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on + # old OSs. + # + # simd: NEON, introduced as optional in v8.0, A few extensions were added with v8.1 but it's still not mandatory. Enables the + # compiler to auto-vectorize. + # sve: Scalable Vector Extensions, introduced as optional in v8.2. Available in Graviton 3 but not in Graviton 2, and most likely + # also not in CI machines. Compiler support for autovectorization is rudimentary at the time of writing, see [5]. Can be + # enabled one-fine-day (TM) but not now. + # ssbs: "Speculative Store Bypass Safe". Optional in v8.0, mandatory in v8.5. Meltdown/spectre countermeasure. + # crypto: SHA1, SHA256, AES. Optional in v8.0. In v8.4, further algorithms were added but it's still optional, see [6]. + # dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits + # but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was + # introduced as optional, either in v8.2 [7] or in v8.4 [8]. + # + # [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md + # [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10 + # [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/ + # [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci + # [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support + # [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en + # [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html + # [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions- + set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs") + endif () elseif (ARCH_PPC64LE) # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") elseif (ARCH_AMD64) + option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1) + option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1) + option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1) + option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) + option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) + option (ENABLE_AVX "Use AVX instructions on x86_64" 0) + option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) + option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) + option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0) + option (ENABLE_BMI "Use BMI instructions on x86_64" 0) + option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) + option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) + + option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64 for maximum compatibility with older/embedded hardware." 0) + if (NO_SSE3_OR_HIGHER) + SET(ENABLE_SSSE3 0) + SET(ENABLE_SSE41 0) + SET(ENABLE_SSE42 0) + SET(ENABLE_PCLMULQDQ 0) + SET(ENABLE_POPCNT 0) + SET(ENABLE_AVX 0) + SET(ENABLE_AVX2 0) + SET(ENABLE_AVX512 0) + SET(ENABLE_AVX512_VBMI 0) + SET(ENABLE_BMI 0) + SET(ENABLE_AVX2_FOR_SPEC_OP 0) + SET(ENABLE_AVX512_FOR_SPEC_OP 0) + endif() + set (TEST_FLAG "-mssse3") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" diff --git a/cmake/git.cmake b/cmake/git.cmake new file mode 100644 index 00000000000..397ec3cd081 --- /dev/null +++ b/cmake/git.cmake @@ -0,0 +1,42 @@ +find_package(Git) + +# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS. +if (Git_FOUND) + # Commit hash + whether the building workspace was dirty or not + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_HASH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Branch name + execute_process(COMMAND + "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_BRANCH + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Date of the commit + SET(ENV{TZ} "UTC") + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_DATE + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + # Subject of the commit + execute_process(COMMAND + "${GIT_EXECUTABLE}" log -1 --format=%s + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GIT_COMMIT_SUBJECT + ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) + + message(STATUS "Git HEAD commit hash: ${GIT_HASH}") + + execute_process( + COMMAND ${GIT_EXECUTABLE} status + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) +else() + message(STATUS "Git could not be found.") +endif() + diff --git a/cmake/git_status.cmake b/cmake/git_status.cmake deleted file mode 100644 index c1047c0ccbf..00000000000 --- a/cmake/git_status.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Print the status of the git repository (if git is available). -# This is useful for troubleshooting build failure reports - -find_package(Git) - -if (Git_FOUND) - - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse HEAD - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} - OUTPUT_VARIABLE GIT_COMMIT_ID - OUTPUT_STRIP_TRAILING_WHITESPACE) - - message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}") - - execute_process( - COMMAND ${GIT_EXECUTABLE} status - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE) - -else() - message(STATUS "Git could not be found.") -endif() diff --git a/docker/packager/packager b/docker/packager/packager index 9da787e9006..b4aa4ebdd91 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -128,6 +128,7 @@ def parse_env_variables( DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" + ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" AMD64_SSE2_SUFFIX = "-amd64sse2" @@ -140,6 +141,7 @@ def parse_env_variables( is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) + is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX) @@ -178,6 +180,13 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake" ) result.append("DEB_ARCH=arm64") + elif is_cross_arm_v80compat: + cc = compiler[: -len(ARM_V80COMPAT_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake" + ) + cmake_flags.append("-DNO_ARMV81_OR_HIGHER=1") + result.append("DEB_ARCH=arm64") elif is_cross_freebsd: cc = compiler[: -len(FREEBSD_SUFFIX)] cmake_flags.append( @@ -343,6 +352,7 @@ if __name__ == "__main__": "clang-15-darwin", "clang-15-darwin-aarch64", "clang-15-aarch64", + "clang-15-aarch64-v80compat", "clang-15-ppc64le", "clang-15-amd64sse2", "clang-15-freebsd", diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index b1cfe42d28f..bf76fb20928 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -338,6 +338,12 @@ echo $previous_release_tag | download_release_packets && echo -e 'Download scrip || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log +for table in query_log trace_log +do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||: +done + +tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: # Check if we cloned previous release repository successfully if ! [ "$(ls -A previous_release_repository/tests/queries)" ] @@ -454,6 +460,7 @@ else -e "This engine is deprecated and is not supported in transactions" \ -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ + -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv @@ -496,6 +503,12 @@ else # Remove file bc_check_fatal_messages.txt if it's empty [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt + + tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: + for table in query_log trace_log + do + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||: + done fi dmesg -T > /test_output/dmesg.log @@ -505,17 +518,8 @@ grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e && echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv -tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: mv /var/log/clickhouse-server/stderr.log /test_output/ -# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts. -# (so that clickhouse-local --path can read it w/o extra care). -sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql -for table in query_log trace_log; do - sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql - tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||: -done - # Write check result into check_status.tsv clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index c2970924fb0..0fb5373a3ae 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -12,7 +12,16 @@ then DIR="amd64" elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] then - DIR="aarch64" + # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 + # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo. + # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). + ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/') + if [ "${ARMV82}" ] + then + DIR="aarch64" + else + DIR="aarch64v80compat" + fi elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" @@ -22,12 +31,6 @@ then if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ] then DIR="freebsd" - elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] - then - DIR="freebsd-aarch64" - elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] - then - DIR="freebsd-powerpc64le" fi elif [ "${OS}" = "Darwin" ] then @@ -42,7 +45,7 @@ fi if [ -z "${DIR}" ] then - echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported." + echo "Operating system '${OS}' / architecture '${ARCH}' is unsupported." exit 1 fi diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index e31ce3de5ce..eaa9cdfde88 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -16,19 +16,21 @@ OpenCelliD Project is licensed under a Creative Commons Attribution-ShareAlike 4 1. Download the snapshot of the dataset from February 2021: [cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB). 2. Validate the integrity (optional step): -``` +```bash md5sum cell_towers.csv.xz +``` +```response 8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz ``` 3. Decompress it with the following command: -``` +```bash xz -d cell_towers.csv.xz ``` 4. Create a table: -``` +```sql CREATE TABLE cell_towers ( radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5), @@ -50,7 +52,7 @@ ENGINE = MergeTree ORDER BY (radio, mcc, net, created); ``` 5. Insert the dataset: -``` +```bash clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv ``` @@ -58,9 +60,10 @@ clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_t 1. A number of cell towers by type: -``` +```sql SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC - +``` +```response ┌─radio─┬────────c─┐ │ UMTS │ 20686487 │ │ LTE │ 12101148 │ @@ -74,9 +77,10 @@ SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC 2. Cell towers by [mobile country code (MCC)](https://en.wikipedia.org/wiki/Mobile_country_code): -``` +```sql SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10 - +``` +```response ┌─mcc─┬─count()─┐ │ 310 │ 5024650 │ │ 262 │ 2622423 │ @@ -104,21 +108,72 @@ Using `pointInPolygon` function. 1. Create a table where we will store polygons: -``` -CREATE TEMPORARY TABLE moscow (polygon Array(Tuple(Float64, Float64))); +```sql +CREATE TEMPORARY TABLE +moscow (polygon Array(Tuple(Float64, Float64))); ``` 2. This is a rough shape of Moscow (without "new Moscow"): -``` -INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), (37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), (37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), (37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), (37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), (37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), (37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), (37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), (37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), (37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), (37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), (37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), (37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), (37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), (37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), (37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), (37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), (37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), (37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), (37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), (37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), (37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), (37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), (37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), (37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), (37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), (37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), (37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), (37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), (37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), (37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), (37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), (37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), (37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), (37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), (37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), (37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), (37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), (37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), (37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), (37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), (37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), (37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), (37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), (37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), (37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), (37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), (37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), (37.84172564285271, 55.78000432402266)]); +```sql +INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266), +(37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554), +(37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413), +(37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372), +(37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784), +(37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089), +(37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608), +(37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335), +(37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639), +(37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552), +(37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121), +(37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455), +(37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279), +(37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446), +(37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373), +(37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915), +(37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051), +(37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785), +(37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155), +(37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229), +(37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064), +(37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576), +(37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014), +(37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414), +(37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686), +(37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811), +(37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614), +(37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725), +(37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266), +(37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804), +(37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979), +(37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975), +(37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751), +(37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635), +(37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249), +(37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802), +(37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586), +(37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106), +(37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566), +(37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865), +(37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505), +(37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554), +(37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488), +(37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761), +(37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134), +(37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492), +(37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685), +(37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368), +(37.84172564285271, 55.78000432402266)]); ``` 3. Check how many cell towers are in Moscow: +```sql +SELECT count() FROM cell_towers +WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) ``` -SELECT count() FROM cell_towers WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow)) - +```response ┌─count()─┐ │ 310463 │ └─────────┘ diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..14c06ee0336 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -6,7 +6,7 @@ sidebar_label: VIEW # CREATE VIEW -Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). +Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). ## Normal View diff --git a/docs/ru/engines/table-engines/special/external-data.md b/docs/ru/engines/table-engines/special/external-data.md index 95ae1aa9059..b98039f768a 100644 --- a/docs/ru/engines/table-engines/special/external-data.md +++ b/docs/ru/engines/table-engines/special/external-data.md @@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные, Таких секций может быть несколько - по числу передаваемых таблиц. -**–external** - маркер начала секции. -**–file** - путь к файлу с дампом таблицы, или -, что обозначает stdin. -Из stdin может быть считана только одна таблица. +- **--external** - маркер начала секции. +- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`. +Из `stdin` может быть считана только одна таблица. Следующие параметры не обязательные: -**–name** - имя таблицы. Если не указано - используется _data. -**–format** - формат данных в файле. Если не указано - используется TabSeparated. +- **--name** - имя таблицы. Если не указано - используется _data. +- **--format** - формат данных в файле. Если не указано - используется TabSeparated. Должен быть указан один из следующих параметров: -**–types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … -**–structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. +- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, … +- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов. Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name. diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index a5ad506abe6..ce176ccade5 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) - # Sraight list of all required sources + # Straight list of all required sources set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp @@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp + ${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp Keeper.cpp TinyContext.cpp diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6d487a68111..fdfe0cef2b3 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -490,8 +490,9 @@ int Keeper::main(const std::vector & /*args*/) void Keeper::logRevision() const { Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/programs/server/config.xml b/programs/server/config.xml index ab79e7a2e4c..dcb8ac0804c 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1114,10 +1114,6 @@ system asynchronous_metric_log
- 7000
diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index e63a277497a..f013e3ac064 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) { sync.sub(plots[idx]); /// Set title - const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ); + const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : ''; chart.querySelector('.title').firstChild.data = title; } diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp index cb3f611fd4e..32f87d4d64a 100644 --- a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -58,7 +58,9 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes } else { - auto src_buf = createReadBufferFromFileBase(file_path, {}, fs::file_size(file_path)); + ReadSettings read_settings{}; + read_settings.local_fs_method = LocalFSReadMethod::pread; + auto src_buf = createReadBufferFromFileBase(file_path, read_settings, fs::file_size(file_path)); std::shared_ptr dst_buf; /// test mode for integration tests. diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7d05cbb0681..f407fab68f1 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -70,7 +70,7 @@ #include #include #include - +#include namespace fs = std::filesystem; using namespace std::literals; @@ -292,7 +292,7 @@ void ClientBase::setupSignalHandler() ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const { - ParserQuery parser(end, global_context->getSettings().allow_settings_after_format_in_insert); + std::unique_ptr parser; ASTPtr res; const auto & settings = global_context->getSettingsRef(); @@ -301,10 +301,17 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (!allow_multi_statements) max_length = settings.max_query_size; + const Dialect & dialect = settings.dialect; + + if (dialect == Dialect::kusto) + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + else + parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + if (is_interactive || ignore_error) { String message; - res = tryParseQuery(parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); if (!res) { @@ -314,7 +321,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu } else { - res = parseQueryAndMovePosition(parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); } if (is_interactive) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 278056130fd..3a6e623dc3f 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -198,6 +198,7 @@ protected: SharedContextHolder shared_context; ContextMutablePtr global_context; + /// thread status should be destructed before shared context because it relies on process list. std::optional thread_status; ServerConnectionPtr connection; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 7a663195655..a9795e75b28 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -179,6 +179,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) { disconnect(); + /// Remove this possible stale entry from cache + DNSResolver::instance().removeHostFromCache(host); + /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. throw NetException(e.displayText() + " (" + getDescription() + ")", ErrorCodes::NETWORK_ERROR); } @@ -186,6 +189,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts) { disconnect(); + /// Remove this possible stale entry from cache + DNSResolver::instance().removeHostFromCache(host); + /// Add server address to exception. Also Exception will remember stack trace. It's a pity that more precise exception type is lost. /// This exception can only be thrown from socket->connect(), so add information about connection timeout. const auto & connection_timeout = static_cast(secure) ? timeouts.secure_connection_timeout : timeouts.connection_timeout; diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 74bcdfa1768..cb570c87498 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -12,14 +12,12 @@ #include #include #include -#include #include #include #include #include #include -#include #include #include @@ -27,10 +25,6 @@ # include #endif -#if USE_MULTITARGET_CODE -# include -#endif - #if USE_EMBEDDED_COMPILER #include #include @@ -477,128 +471,6 @@ void ColumnVector::insertRangeFrom(const IColumn & src, size_t start, size_t memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0])); } -static inline UInt64 blsr(UInt64 mask) -{ -#ifdef __BMI__ - return _blsr_u64(mask); -#else - return mask & (mask-1); -#endif -} - -DECLARE_DEFAULT_CODE( -template -inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data) -{ - while (filt_pos < filt_end_aligned) - { - UInt64 mask = bytes64MaskToBits64Mask(filt_pos); - - if (0xffffffffffffffff == mask) - { - res_data.insert(data_pos, data_pos + SIMD_BYTES); - } - else - { - while (mask) - { - size_t index = std::countr_zero(mask); - res_data.push_back(data_pos[index]); - mask = blsr(mask); - } - } - - filt_pos += SIMD_BYTES; - data_pos += SIMD_BYTES; - } -} -) - -namespace -{ -template -void resize(Container & res_data, size_t reserve_size) -{ -#if defined(MEMORY_SANITIZER) - res_data.resize_fill(reserve_size, static_cast(0)); // MSan doesn't recognize that all allocated memory is written by AVX-512 intrinsics. -#else - res_data.resize(reserve_size); -#endif -} -} - -DECLARE_AVX512VBMI2_SPECIFIC_CODE( -template -inline void compressStoreAVX512(const void *src, void *dst, const UInt64 mask) -{ - __m512i vsrc = _mm512_loadu_si512(src); - if constexpr (ELEMENT_WIDTH == 1) - _mm512_mask_compressstoreu_epi8(dst, static_cast<__mmask64>(mask), vsrc); - else if constexpr (ELEMENT_WIDTH == 2) - _mm512_mask_compressstoreu_epi16(dst, static_cast<__mmask32>(mask), vsrc); - else if constexpr (ELEMENT_WIDTH == 4) - _mm512_mask_compressstoreu_epi32(dst, static_cast<__mmask16>(mask), vsrc); - else if constexpr (ELEMENT_WIDTH == 8) - _mm512_mask_compressstoreu_epi64(dst, static_cast<__mmask8>(mask), vsrc); -} - -template -inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data) -{ - static constexpr size_t VEC_LEN = 64; /// AVX512 vector length - 64 bytes - static constexpr size_t ELEMENT_WIDTH = sizeof(T); - static constexpr size_t ELEMENTS_PER_VEC = VEC_LEN / ELEMENT_WIDTH; - static constexpr UInt64 KMASK = 0xffffffffffffffff >> (64 - ELEMENTS_PER_VEC); - - size_t current_offset = res_data.size(); - size_t reserve_size = res_data.size(); - size_t alloc_size = SIMD_BYTES * 2; - - while (filt_pos < filt_end_aligned) - { - /// to avoid calling resize too frequently, resize to reserve buffer. - if (reserve_size - current_offset < SIMD_BYTES) - { - reserve_size += alloc_size; - resize(res_data, reserve_size); - alloc_size *= 2; - } - - UInt64 mask = bytes64MaskToBits64Mask(filt_pos); - - if (0xffffffffffffffff == mask) - { - for (size_t i = 0; i < SIMD_BYTES; i += ELEMENTS_PER_VEC) - _mm512_storeu_si512(reinterpret_cast(&res_data[current_offset + i]), - _mm512_loadu_si512(reinterpret_cast(data_pos + i))); - current_offset += SIMD_BYTES; - } - else - { - if (mask) - { - for (size_t i = 0; i < SIMD_BYTES; i += ELEMENTS_PER_VEC) - { - compressStoreAVX512(reinterpret_cast(data_pos + i), - reinterpret_cast(&res_data[current_offset]), mask & KMASK); - current_offset += std::popcount(mask & KMASK); - /// prepare mask for next iter, if ELEMENTS_PER_VEC = 64, no next iter - if (ELEMENTS_PER_VEC < 64) - { - mask >>= ELEMENTS_PER_VEC; - } - } - } - } - - filt_pos += SIMD_BYTES; - data_pos += SIMD_BYTES; - } - /// resize to the real size. - res_data.resize(current_offset); -} -) - template ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const { @@ -624,13 +496,31 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s static constexpr size_t SIMD_BYTES = 64; const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; -#if USE_MULTITARGET_CODE - static constexpr bool VBMI2_CAPABLE = sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8; - if (VBMI2_CAPABLE && isArchSupported(TargetArch::AVX512VBMI2)) - TargetSpecific::AVX512VBMI2::doFilterAligned(filt_pos, filt_end_aligned, data_pos, res_data); - else -#endif - TargetSpecific::Default::doFilterAligned(filt_pos, filt_end_aligned, data_pos, res_data); + while (filt_pos < filt_end_aligned) + { + UInt64 mask = bytes64MaskToBits64Mask(filt_pos); + + if (0xffffffffffffffff == mask) + { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } + else + { + while (mask) + { + size_t index = std::countr_zero(mask); + res_data.push_back(data_pos[index]); + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } while (filt_pos < filt_end) { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index f967b2b4039..70a8a9bce4b 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -7,15 +7,11 @@ #include #include #include -#include #include #include #include "config_core.h" -#if USE_MULTITARGET_CODE -# include -#endif namespace DB { @@ -395,124 +391,6 @@ protected: Container data; }; -DECLARE_DEFAULT_CODE( -template -inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) -{ - for (size_t i = 0; i < limit; ++i) - res_data[i] = data[indexes[i]]; -} -); - -DECLARE_AVX512VBMI_SPECIFIC_CODE( -template -inline void vectorIndexImpl(const Container & data, const PaddedPODArray & indexes, size_t limit, Container & res_data) -{ - static constexpr UInt64 MASK64 = 0xffffffffffffffff; - const size_t limit64 = limit & ~63; - size_t pos = 0; - size_t data_size = data.size(); - - auto data_pos = reinterpret_cast(data.data()); - auto indexes_pos = reinterpret_cast(indexes.data()); - auto res_pos = reinterpret_cast(res_data.data()); - - if (data_size <= 64) - { - /// one single mask load for table size <= 64 - __mmask64 last_mask = MASK64 >> (64 - data_size); - __m512i table1 = _mm512_maskz_loadu_epi8(last_mask, data_pos); - - /// 64 bytes table lookup using one single permutexvar_epi8 - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i out = _mm512_permutexvar_epi8(vidx, table1); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - /// tail handling - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i out = _mm512_permutexvar_epi8(vidx, table1); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } - else if (data_size <= 128) - { - /// table size (64, 128] requires 2 zmm load - __mmask64 last_mask = MASK64 >> (128 - data_size); - __m512i table1 = _mm512_loadu_epi8(data_pos); - __m512i table2 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 64); - - /// 128 bytes table lookup using one single permute2xvar_epi8 - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i out = _mm512_permutex2var_epi8(table1, vidx, table2); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } - else - { - if (data_size > 256) - { - /// byte index will not exceed 256 boundary. - data_size = 256; - } - - __m512i table1 = _mm512_loadu_epi8(data_pos); - __m512i table2 = _mm512_loadu_epi8(data_pos + 64); - __m512i table3, table4; - if (data_size <= 192) - { - /// only 3 tables need to load if size <= 192 - __mmask64 last_mask = MASK64 >> (192 - data_size); - table3 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 128); - table4 = _mm512_setzero_si512(); - } - else - { - __mmask64 last_mask = MASK64 >> (256 - data_size); - table3 = _mm512_loadu_epi8(data_pos + 128); - table4 = _mm512_maskz_loadu_epi8(last_mask, data_pos + 192); - } - - /// 256 bytes table lookup can use: 2 permute2xvar_epi8 plus 1 blender with MSB - while (pos < limit64) - { - __m512i vidx = _mm512_loadu_epi8(indexes_pos + pos); - __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); - __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); - __mmask64 msb = _mm512_movepi8_mask(vidx); - __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); - _mm512_storeu_epi8(res_pos + pos, out); - pos += 64; - } - if (limit > limit64) - { - __mmask64 tail_mask = MASK64 >> (limit64 + 64 - limit); - __m512i vidx = _mm512_maskz_loadu_epi8(tail_mask, indexes_pos + pos); - __m512i tmp1 = _mm512_permutex2var_epi8(table1, vidx, table2); - __m512i tmp2 = _mm512_permutex2var_epi8(table3, vidx, table4); - __mmask64 msb = _mm512_movepi8_mask(vidx); - __m512i out = _mm512_mask_blend_epi8(msb, tmp1, tmp2); - _mm512_mask_storeu_epi8(res_pos + pos, tail_mask, out); - } - } -} -); - template template ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_t limit) const @@ -521,18 +399,8 @@ ColumnPtr ColumnVector::indexImpl(const PaddedPODArray & indexes, size_ auto res = this->create(limit); typename Self::Container & res_data = res->getData(); -#if USE_MULTITARGET_CODE - if constexpr (sizeof(T) == 1 && sizeof(Type) == 1) - { - /// VBMI optimization only applicable for (U)Int8 types - if (isArchSupported(TargetArch::AVX512VBMI)) - { - TargetSpecific::AVX512VBMI::vectorIndexImpl(data, indexes, limit, res_data); - return res; - } - } -#endif - TargetSpecific::Default::vectorIndexImpl(data, indexes, limit, res_data); + for (size_t i = 0; i < limit; ++i) + res_data[i] = data[indexes[i]]; return res; } diff --git a/src/Columns/tests/gtest_column_vector.cpp b/src/Columns/tests/gtest_column_vector.cpp deleted file mode 100644 index 9dfb8c5aeb6..00000000000 --- a/src/Columns/tests/gtest_column_vector.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include -#include -#include -#include -#include -#include - -using namespace DB; - -static pcg64 rng(randomSeed()); -static constexpr int error_code = 12345; -static constexpr size_t TEST_RUNS = 500; -static constexpr size_t MAX_ROWS = 10000; -static const std::vector filter_ratios = {1, 2, 5, 11, 32, 64, 100, 1000}; -static const size_t K = filter_ratios.size(); - -template -static MutableColumnPtr createColumn(size_t n) -{ - auto column = ColumnVector::create(); - auto & values = column->getData(); - - for (size_t i = 0; i < n; ++i) - { - values.push_back(i); - } - - return column; -} - -bool checkFilter(const PaddedPODArray &flit, const IColumn & src, const IColumn & dst) -{ - size_t n = flit.size(); - size_t dst_size = dst.size(); - size_t j = 0; /// index of dest - for (size_t i = 0; i < n; ++i) - { - if (flit[i] != 0) - { - if ((dst_size <= j) || (src.compareAt(i, j, dst, 0) != 0)) - return false; - j++; - } - } - return dst_size == j; /// filtered size check -} - -template -static void testFilter() -{ - auto test_case = [&](size_t rows, size_t filter_ratio) - { - auto vector_column = createColumn(rows); - PaddedPODArray flit(rows); - for (size_t i = 0; i < rows; ++i) - flit[i] = rng() % filter_ratio == 0; - auto res_column = vector_column->filter(flit, -1); - - if (!checkFilter(flit, *vector_column, *res_column)) - throw Exception(error_code, "VectorColumn filter failure, type: {}", typeid(T).name()); - }; - - try - { - for (size_t i = 0; i < TEST_RUNS; ++i) - { - size_t rows = rng() % MAX_ROWS + 1; - size_t filter_ratio = filter_ratios[rng() % K]; - - test_case(rows, filter_ratio); - } - } - catch (const Exception & e) - { - FAIL() << e.displayText(); - } -} - -TEST(ColumnVector, Filter) -{ - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); - testFilter(); -} - -template -static MutableColumnPtr createIndexColumn(size_t limit, size_t rows) -{ - auto column = ColumnVector::create(); - auto & values = column->getData(); - auto max = std::numeric_limits::max(); - limit = limit > max ? max : limit; - - for (size_t i = 0; i < rows; ++i) - { - T val = rng() % limit; - values.push_back(val); - } - - return column; -} - -template -static void testIndex() -{ - static const std::vector column_sizes = {64, 128, 196, 256, 512}; - - auto test_case = [&](size_t rows, size_t index_rows, size_t limit) - { - auto vector_column = createColumn(rows); - auto index_column = createIndexColumn(rows, index_rows); - auto res_column = vector_column->index(*index_column, limit); - if (limit == 0) - limit = index_column->size(); - - /// check results - if (limit != res_column->size()) - throw Exception(error_code, "ColumnVector index size not match to limit: {} {}", typeid(T).name(), typeid(IndexType).name()); - for (size_t i = 0; i < limit; ++i) - { - /// vector_column data is the same as index, so indexed column's value will equals to index_column. - if (res_column->get64(i) != index_column->get64(i)) - throw Exception(error_code, "ColumnVector index fail: {} {}", typeid(T).name(), typeid(IndexType).name()); - } - }; - - try - { - for (size_t i = 0; i < TEST_RUNS; ++i) - { - /// make sure rows distribute in (column_sizes[r-1], colulmn_sizes[r]] - size_t row_idx = rng() % column_sizes.size(); - size_t row_base = row_idx > 0 ? column_sizes[row_idx - 1] : 0; - size_t rows = row_base + (rng() % (column_sizes[row_idx] - row_base) + 1); - size_t index_rows = rng() % MAX_ROWS + 1; - - test_case(rows, index_rows, 0); - test_case(rows, index_rows, static_cast(0.5 * index_rows)); - } - } - catch (const Exception & e) - { - FAIL() << e.displayText(); - } -} - -TEST(ColumnVector, Index) -{ - testIndex(); - testIndex(); - testIndex(); -} diff --git a/src/Common/CpuId.h b/src/Common/CpuId.h index 1e54ccf62b3..167fa22faf6 100644 --- a/src/Common/CpuId.h +++ b/src/Common/CpuId.h @@ -82,7 +82,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT OP(AVX512BW) \ OP(AVX512VL) \ OP(AVX512VBMI) \ - OP(AVX512VBMI2) \ OP(PREFETCHWT1) \ OP(SHA) \ OP(ADX) \ @@ -303,11 +302,6 @@ bool haveAVX512VBMI() noexcept return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 1) & 1u); } -bool haveAVX512VBMI2() noexcept -{ - return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 6) & 1u); -} - bool haveRDRAND() noexcept { return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 67d87f757c7..1e5ec09f262 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -1,7 +1,8 @@ #include "DNSResolver.h" -#include +#include #include #include +#include #include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include "DNSPTRResolverProvider.h" namespace ProfileEvents @@ -41,9 +43,11 @@ namespace ErrorCodes extern const int DNS_ERROR; } +namespace +{ /// Slightly altered implementation from https://github.com/pocoproject/poco/blob/poco-1.6.1/Net/src/SocketAddress.cpp#L86 -static void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port) +void splitHostAndPort(const std::string & host_and_port, std::string & out_host, UInt16 & out_port) { String port_str; out_host.clear(); @@ -84,7 +88,7 @@ static void splitHostAndPort(const std::string & host_and_port, std::string & ou throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); } -static DNSResolver::IPAddresses hostByName(const std::string & host) +DNSResolver::IPAddresses hostByName(const std::string & host) { /// Do not resolve IPv6 (or IPv4) if no local IPv6 (or IPv4) addresses are configured. /// It should not affect client address checking, since client cannot connect from IPv6 address @@ -112,7 +116,7 @@ static DNSResolver::IPAddresses hostByName(const std::string & host) return addresses; } -static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) +DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) { Poco::Net::IPAddress ip; @@ -136,7 +140,13 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -static std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) +DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase & cache, const std::string & host) +{ + auto [result, _ ] = cache.getOrSet(host, [&host]() { return std::make_shared(resolveIPAddressImpl(host)); }); + return *result; +} + +std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) { auto ptr_resolver = DB::DNSPTRResolverProvider::get(); @@ -149,13 +159,27 @@ static std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress } } +std::unordered_set reverseResolveWithCache( + CacheBase> & cache, const Poco::Net::IPAddress & address) +{ + auto [result, _ ] = cache.getOrSet(address, [&address]() { return std::make_shared>(reverseResolveImpl(address)); }); + return *result; +} + +Poco::Net::IPAddress pickAddress(const DNSResolver::IPAddresses & addresses) +{ + return addresses.front(); +} + +} + struct DNSResolver::Impl { using HostWithConsecutiveFailures = std::unordered_map; using AddressWithConsecutiveFailures = std::unordered_map; - CachedFn<&resolveIPAddressImpl> cache_host; - CachedFn<&reverseResolveImpl> cache_address; + CacheBase cache_host{100}; + CacheBase> cache_address{100}; std::mutex drop_mutex; std::mutex update_mutex; @@ -180,7 +204,7 @@ DNSResolver::DNSResolver() : impl(std::make_unique()), log(&P Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { - return resolveHostAll(host).front(); + return pickAddress(resolveHostAll(host)); } DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) @@ -189,7 +213,7 @@ DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) return resolveIPAddressImpl(host); addToNewHosts(host); - return impl->cache_host(host); + return resolveIPAddressWithCache(impl->cache_host, host); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_and_port) @@ -202,7 +226,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an splitHostAndPort(host_and_port, host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) @@ -211,7 +235,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); } std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) @@ -224,7 +248,7 @@ std::vector DNSResolver::resolveAddressList(const std: if (!impl->disable_cache) addToNewHosts(host); - std::vector ips = impl->disable_cache ? hostByName(host) : impl->cache_host(host); + std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host); auto ips_end = std::unique(ips.begin(), ips.end()); addresses.reserve(ips_end - ips.begin()); @@ -240,13 +264,13 @@ std::unordered_set DNSResolver::reverseResolve(const Poco::Net::IPAddres return reverseResolveImpl(address); addToNewAddresses(address); - return impl->cache_address(address); + return reverseResolveWithCache(impl->cache_address, address); } void DNSResolver::dropCache() { - impl->cache_host.drop(); - impl->cache_address.drop(); + impl->cache_host.reset(); + impl->cache_address.reset(); std::scoped_lock lock(impl->update_mutex, impl->drop_mutex); @@ -257,6 +281,11 @@ void DNSResolver::dropCache() impl->host_name.reset(); } +void DNSResolver::removeHostFromCache(const std::string & host) +{ + impl->cache_host.remove(host); +} + void DNSResolver::setDisableCacheFlag(bool is_disabled) { impl->disable_cache = is_disabled; @@ -378,17 +407,20 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures) bool DNSResolver::updateHost(const String & host) { - /// Usage of updateHost implies that host is already in cache and there is no extra computations - auto old_value = impl->cache_host(host); - impl->cache_host.update(host); - return old_value != impl->cache_host(host); + const auto old_value = resolveIPAddressWithCache(impl->cache_host, host); + auto new_value = resolveIPAddressImpl(host); + const bool result = old_value != new_value; + impl->cache_host.set(host, std::make_shared(std::move(new_value))); + return result; } bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address) { - auto old_value = impl->cache_address(address); - impl->cache_address.update(address); - return old_value == impl->cache_address(address); + const auto old_value = reverseResolveWithCache(impl->cache_address, address); + auto new_value = reverseResolveImpl(address); + const bool result = old_value != new_value; + impl->cache_address.set(address, std::make_shared>(std::move(new_value))); + return result; } void DNSResolver::addToNewHosts(const String & host) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 83de616d81a..a05456d3de8 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -18,6 +18,7 @@ class DNSResolver : private boost::noncopyable { public: using IPAddresses = std::vector; + using IPAddressesPtr = std::shared_ptr; static DNSResolver & instance(); @@ -48,6 +49,9 @@ public: /// Drops all caches void dropCache(); + /// Removes an entry from cache or does nothing + void removeHostFromCache(const std::string & host); + /// Updates all known hosts in cache. /// Returns true if IP of any host has been changed or an element was dropped (too many failures) bool updateCache(UInt32 max_consecutive_failures); diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index d5c2188ad01..515060803d6 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB { @@ -226,6 +226,30 @@ String TracingContext::composeTraceparentHeader() const static_cast(trace_flags)); } +void TracingContext::deserialize(ReadBuffer & buf) +{ + buf >> this->trace_id + >> "\n" + >> this->span_id + >> "\n" + >> this->tracestate + >> "\n" + >> this->trace_flags + >> "\n"; +} + +void TracingContext::serialize(WriteBuffer & buf) const +{ + buf << this->trace_id + << "\n" + << this->span_id + << "\n" + << this->tracestate + << "\n" + << this->trace_flags + << "\n"; +} + const TracingContextOnThread & CurrentContext() { return current_thread_trace_context; diff --git a/src/Common/OpenTelemetryTraceContext.h b/src/Common/OpenTelemetryTraceContext.h index 63136f8731d..03bac2891fc 100644 --- a/src/Common/OpenTelemetryTraceContext.h +++ b/src/Common/OpenTelemetryTraceContext.h @@ -7,6 +7,8 @@ namespace DB struct Settings; class OpenTelemetrySpanLog; +class WriteBuffer; +class ReadBuffer; namespace OpenTelemetry { @@ -63,6 +65,9 @@ struct TracingContext { return trace_id != UUID(); } + + void deserialize(ReadBuffer & buf); + void serialize(WriteBuffer & buf) const; }; /// Tracing context kept on each thread @@ -157,5 +162,16 @@ struct SpanHolder : public Span } +inline WriteBuffer & operator<<(WriteBuffer & buf, const OpenTelemetry::TracingContext & context) +{ + context.serialize(buf); + return buf; } +inline ReadBuffer & operator>> (ReadBuffer & buf, OpenTelemetry::TracingContext & context) +{ + context.deserialize(buf); + return buf; +} + +} diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 6162de48143..281a65ca36a 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -32,6 +32,13 @@ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) +/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array +/// to ensure correct byte order on different endian machines +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define CURRENT_BYTES_IDX(i) (7 - i) +#else +#define CURRENT_BYTES_IDX(i) (i) +#endif class SipHash { @@ -55,7 +62,7 @@ private: ALWAYS_INLINE void finalize() { /// In the last free byte, we write the remainder of the division by 256. - current_bytes[7] = static_cast(cnt); + current_bytes[CURRENT_BYTES_IDX(7)] = static_cast(cnt); v3 ^= current_word; SIPROUND; @@ -92,7 +99,7 @@ public: { while (cnt & 7 && data < end) { - current_bytes[cnt & 7] = *data; + current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data; ++data; ++cnt; } @@ -125,13 +132,13 @@ public: current_word = 0; switch (end - data) { - case 7: current_bytes[6] = data[6]; [[fallthrough]]; - case 6: current_bytes[5] = data[5]; [[fallthrough]]; - case 5: current_bytes[4] = data[4]; [[fallthrough]]; - case 4: current_bytes[3] = data[3]; [[fallthrough]]; - case 3: current_bytes[2] = data[2]; [[fallthrough]]; - case 2: current_bytes[1] = data[1]; [[fallthrough]]; - case 1: current_bytes[0] = data[0]; [[fallthrough]]; + case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; + case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; + case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]]; + case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]]; + case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]]; + case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]]; + case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]]; case 0: break; } } @@ -157,8 +164,8 @@ public: void get128(char * out) { finalize(); - unalignedStoreLE(out, v0 ^ v1); - unalignedStoreLE(out + 8, v2 ^ v3); + unalignedStore(out, v0 ^ v1); + unalignedStore(out + 8, v2 ^ v3); } template @@ -225,3 +232,5 @@ inline UInt64 sipHash64(const std::string & s) { return sipHash64(s.data(), s.size()); } + +#undef CURRENT_BYTES_IDX diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 70f80b62868..37ce3a03cd8 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -4,14 +4,15 @@ #include #include #include -#include #include #include #include #include +#include #include #include +#include #include @@ -462,20 +463,36 @@ std::string StackTrace::toString(void ** frame_pointers_, size_t offset, size_t return toStringStatic(frame_pointers_copy, offset, size); } -static CachedFn<&toStringImpl> & cacheInstance() +using StackTraceRepresentation = std::tuple; +using StackTraceCache = std::map; + +static StackTraceCache & cacheInstance() { - static CachedFn<&toStringImpl> cache; + static StackTraceCache cache; return cache; } +static std::mutex stacktrace_cache_mutex; + std::string StackTrace::toStringStatic(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size) { /// Calculation of stack trace text is extremely slow. /// We use simple cache because otherwise the server could be overloaded by trash queries. - return cacheInstance()(frame_pointers, offset, size); + /// Note that this cache can grow unconditionally, but practically it should be small. + std::lock_guard lock{stacktrace_cache_mutex}; + + StackTraceRepresentation key{frame_pointers, offset, size}; + auto & cache = cacheInstance(); + if (cache.contains(key)) + return cache[key]; + + auto result = toStringImpl(frame_pointers, offset, size); + cache[key] = result; + return result; } void StackTrace::dropCache() { - cacheInstance().drop(); + std::lock_guard lock{stacktrace_cache_mutex}; + cacheInstance().clear(); } diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index a5fbe7de078..70b03833775 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -20,8 +20,6 @@ UInt32 getSupportedArchs() result |= static_cast(TargetArch::AVX512BW); if (Cpu::CpuFlagsCache::have_AVX512VBMI) result |= static_cast(TargetArch::AVX512VBMI); - if (Cpu::CpuFlagsCache::have_AVX512VBMI2) - result |= static_cast(TargetArch::AVX512VBMI2); return result; } @@ -40,9 +38,8 @@ String toString(TargetArch arch) case TargetArch::AVX: return "avx"; case TargetArch::AVX2: return "avx2"; case TargetArch::AVX512F: return "avx512f"; - case TargetArch::AVX512BW: return "avx512bw"; - case TargetArch::AVX512VBMI: return "avx512vbmi"; - case TargetArch::AVX512VBMI2: return "avx512vbmi"; + case TargetArch::AVX512BW: return "avx512bw"; + case TargetArch::AVX512VBMI: return "avx512vbmi"; } __builtin_unreachable(); diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index 250642f6ee4..f078c0e3ffc 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -31,7 +31,7 @@ * int funcImpl() { * return 2; * } - * ) // DECLARE_AVX2_SPECIFIC_CODE + * ) // DECLARE_DEFAULT_CODE * * int func() { * #if USE_MULTITARGET_CODE @@ -80,9 +80,8 @@ enum class TargetArch : UInt32 AVX = (1 << 1), AVX2 = (1 << 2), AVX512F = (1 << 3), - AVX512BW = (1 << 4), - AVX512VBMI = (1 << 5), - AVX512VBMI2 = (1 << 6), + AVX512BW = (1 << 4), + AVX512VBMI = (1 << 5), }; /// Runtime detection. @@ -101,7 +100,6 @@ String toString(TargetArch arch); #if defined(__clang__) -#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f"))) @@ -110,8 +108,6 @@ String toString(TargetArch arch); #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt"))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE -# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2\"))),apply_to=function)") # define BEGIN_AVX512VBMI_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi\"))),apply_to=function)") # define BEGIN_AVX512BW_SPECIFIC_CODE \ @@ -133,7 +129,6 @@ String toString(TargetArch arch); # define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); #else -#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native"))) #define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native"))) #define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) #define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) @@ -142,9 +137,6 @@ String toString(TargetArch arch); #define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) #define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE -# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \ - _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native\")") # define BEGIN_AVX512VBMI_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")") @@ -225,16 +217,6 @@ namespace TargetSpecific::AVX512VBMI { \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) \ -BEGIN_AVX512VBMI2_SPECIFIC_CODE \ -namespace TargetSpecific::AVX512VBMI2 { \ - DUMMY_FUNCTION_DEFINITION \ - using namespace DB::TargetSpecific::AVX512VBMI2; \ - __VA_ARGS__ \ -} \ -END_TARGET_SPECIFIC_CODE - - #else #define USE_MULTITARGET_CODE 0 @@ -247,7 +229,6 @@ END_TARGET_SPECIFIC_CODE #define DECLARE_AVX512F_SPECIFIC_CODE(...) #define DECLARE_AVX512BW_SPECIFIC_CODE(...) #define DECLARE_AVX512VBMI_SPECIFIC_CODE(...) -#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) #endif @@ -264,9 +245,8 @@ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) \ -DECLARE_AVX512BW_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512VBMI_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512VBMI2_SPECIFIC_CODE (__VA_ARGS__) +DECLARE_AVX512BW_SPECIFIC_CODE(__VA_ARGS__) \ +DECLARE_AVX512VBMI_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; /// NOLINT @@ -296,9 +276,6 @@ DECLARE_AVX512VBMI_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512VBMI; /// NOLINT ) // DECLARE_AVX512VBMI_SPECIFIC_CODE -DECLARE_AVX512VBMI2_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512VBMI2; /// NOLINT -) // DECLARE_AVX512VBMI2_SPECIFIC_CODE /** Runtime Dispatch helpers for class members. * diff --git a/src/Common/tests/gtest_cached_fn.cpp b/src/Common/tests/gtest_cached_fn.cpp deleted file mode 100644 index ab15a1ee5e1..00000000000 --- a/src/Common/tests/gtest_cached_fn.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include -#include -#include -#include - -using namespace std::chrono_literals; - -constexpr int add(int x, int y) -{ - return x + y; -} - -int longFunction(int x, int y) -{ - std::this_thread::sleep_for(1s); - return x + y; -} - -auto f = [](int x, int y) { return x - y; }; - -TEST(CachedFn, Basic) -{ - CachedFn<&add> fn; - - const int res = fn(1, 2); - EXPECT_EQ(fn(1, 2), res); - - /// In GCC, lambda can't be placed in TEST, producing " has no linkage". - /// Assuming http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4268.html, - /// this is a GCC bug. - CachedFn<+f> fn2; - - const int res2 = fn2(1, 2); - EXPECT_EQ(fn2(1, 2), res2); -} - -TEST(CachedFn, CachingResults) -{ - CachedFn<&longFunction> fn; - - for (int x = 0; x < 2; ++x) - { - for (int y = 0; y < 2; ++y) - { - const int res = fn(x, y); - const time_t start = time(nullptr); - - for (int count = 0; count < 1000; ++count) - EXPECT_EQ(fn(x, y), res); - - EXPECT_LT(time(nullptr) - start, 10); - } - } -} diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index 42d7d967b1f..08092cf68f1 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -705,7 +705,7 @@ void KeeperServer::waitInit() int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) - throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); + LOG_WARNING(log, "Failed to wait for RAFT initialization in {}ms, will continue in background", timeout); } std::vector KeeperServer::getDeadSessions() diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9dd87904a56..5dedc6117aa 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -42,6 +42,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) */ #define COMMON_SETTINGS(M) \ + M(Dialect, dialect, Dialect::clickhouse, "Which SQL dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index daa678c0141..2a564ebe6d3 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -158,5 +158,7 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) - +IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, + {{"clickhouse", Dialect::clickhouse}, + {"kusto", Dialect::kusto}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index b5e908defc7..97c4275c4d2 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -183,4 +183,12 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) +enum class Dialect +{ + clickhouse, + kusto, + kusto_auto, +}; + +DECLARE_SETTING_ENUM(Dialect) } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index d449768935a..157255bba12 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -266,8 +266,8 @@ private: { size_t pos = message.find('\n'); - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message.substr(0, pos)); + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -315,14 +315,14 @@ private: if (query_id.empty()) { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (no query) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, + LOG_FATAL(log, "(version {}{}, build id: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, thread_num, query_id, query, strsignal(sig), sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context) } @@ -838,6 +838,7 @@ static void blockSignals(const std::vector & signals) throw Poco::Exception("Cannot block signal."); } +extern String getGitHash(); void BaseDaemon::initializeTerminationAndSignalProcessing() { @@ -870,13 +871,15 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() #if defined(__ELF__) && !defined(OS_FREEBSD) String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); if (build_id_hex.empty()) - build_id_info = "no build id"; + build_id = ""; else - build_id_info = "build id: " + build_id_hex; + build_id = build_id_hex; #else - build_id_info = "no build id"; + build_id = ""; #endif + git_hash = getGitHash(); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -888,8 +891,9 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::logRevision() const { Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} - + " with revision " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", " + build_id_info + + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) + + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index 1b67ca986a8..d248ad9cec9 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -172,7 +172,8 @@ protected: DB::ConfigProcessor::LoadedConfig loaded_config; Poco::Util::AbstractConfiguration * last_configuration = nullptr; - String build_id_info; + String build_id; + String git_hash; String stored_binary_hash; std::vector handled_signals; diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 78c133d9893..f02fd69aa79 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,7 +1,10 @@ +configure_file(GitHash.cpp.in GitHash.generated.cpp) + add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp + GitHash.generated.cpp ) if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/src/Daemon/GitHash.cpp.in b/src/Daemon/GitHash.cpp.in new file mode 100644 index 00000000000..4a2da793fc2 --- /dev/null +++ b/src/Daemon/GitHash.cpp.in @@ -0,0 +1,8 @@ +// File was generated by CMake + +#include + +String getGitHash() +{ + return "@GIT_HASH@"; +} diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index f1bf56e2beb..507320fffde 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -642,6 +643,7 @@ BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, Contex entry.query = queryToString(query); entry.initiator = ddl_worker->getCommonHostID(); entry.setSettingsIfRequired(query_context); + entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker->tryEnqueueAndExecuteEntry(entry, query_context); Strings hosts_to_wait = getZooKeeper()->getChildren(zookeeper_path + "/replicas"); diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index a63235b3db0..8c2983e1939 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -221,6 +221,10 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr /// NOTE Possibly it would be better to execute initial query on the most up-to-date node, /// but it requires more complex logic around /try node. + OpenTelemetry::SpanHolder span(__FUNCTION__); + span.addAttribute("clickhouse.cluster", database->getDatabaseName()); + entry.tracing_context = OpenTelemetry::CurrentContext(); + auto zookeeper = getAndSetZooKeeper(); UInt32 our_log_ptr = getLogPointer(); UInt32 max_log_ptr = parse(zookeeper->get(database->zookeeper_path + "/max_log_ptr")); diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index cbfcbdf7b88..2717826f6ac 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -37,7 +37,7 @@ namespace ErrorCodes AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRemoteFS( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_, size_t min_bytes_for_seek_) @@ -111,7 +111,7 @@ std::future AsynchronousReadIndirectBufferFromRemot request.ignore = bytes_to_ignore; bytes_to_ignore = 0; } - return reader->submit(request); + return reader.submit(request); } diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index 879658e239f..cf7feb416b2 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -31,7 +31,7 @@ class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase { public: explicit AsynchronousReadIndirectBufferFromRemoteFS( - AsynchronousReaderPtr reader_, const ReadSettings & settings_, + IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_, size_t min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE); @@ -64,7 +64,7 @@ private: std::future asyncReadInto(char * data, size_t size); - AsynchronousReaderPtr reader; + IAsynchronousReader & reader; Int32 priority; diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index d2b3bcbaa5e..e8bfd74af65 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -198,31 +198,10 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); - ThreadGroupStatusPtr running_group; - if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - running_group = CurrentThread::get().getThreadGroup(); + auto schedule = threadPoolCallbackRunner(pool, "ThreadPoolRead"); - ContextPtr query_context; - if (CurrentThread::isInitialized()) - query_context = CurrentThread::get().getQueryContext(); - - auto task = std::make_shared>([request, fd, running_group, query_context] + return schedule([request, fd]() -> Result { - ThreadStatus thread_status; - - SCOPE_EXIT({ - if (running_group) - thread_status.detachQuery(); - }); - - if (running_group) - thread_status.attachQuery(running_group); - - if (query_context) - thread_status.attachQueryContext(query_context); - - setThreadName("ThreadPoolRead"); - Stopwatch watch(CLOCK_MONOTONIC); SCOPE_EXIT({ watch.stop(); @@ -260,14 +239,7 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); return Result{ .size = bytes_read, .offset = request.ignore }; - }); - - auto future = task->get_future(); - - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". - pool.scheduleOrThrow([task]{ (*task)(); }, -request.priority); - - return future; + }, request.priority); } } diff --git a/src/Disks/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h index 15486a7ac1f..dc754e0a81c 100644 --- a/src/Disks/IO/ThreadPoolReader.h +++ b/src/Disks/IO/ThreadPoolReader.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -31,8 +32,11 @@ private: public: ThreadPoolReader(size_t pool_size, size_t queue_size_); + std::future submit(Request request) override; + void wait() override { pool.wait(); } + /// pool automatically waits for all tasks in destructor. }; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 8e2551dceb0..561acc00f6f 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -40,33 +39,10 @@ ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queu std::future ThreadPoolRemoteFSReader::submit(Request request) { - ThreadGroupStatusPtr running_group; - if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - running_group = CurrentThread::get().getThreadGroup(); + auto schedule = threadPoolCallbackRunner(pool, "VFSRead"); - ContextPtr query_context; - if (CurrentThread::isInitialized()) - query_context = CurrentThread::get().getQueryContext(); - - auto task = std::make_shared>([request, running_group, query_context] + return schedule([request]() -> Result { - ThreadStatus thread_status; - - SCOPE_EXIT({ - if (running_group) - thread_status.detachQuery(); - }); - - /// To be able to pass ProfileEvents. - if (running_group) - thread_status.attachQuery(running_group); - - /// Save query context if any, because cache implementation needs it. - if (query_context) - thread_status.attachQueryContext(query_context); - - setThreadName("VFSRead"); - CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; auto * remote_fs_fd = assert_cast(request.descriptor.get()); @@ -80,14 +56,7 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, result.offset ? result.size - result.offset : result.size); return Result{ .size = result.size, .offset = result.offset }; - }); - - auto future = task->get_future(); - - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". - pool.scheduleOrThrow([task]{ (*task)(); }, -request.priority); - - return future; + }, request.priority); } } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index 66e300697b8..f3777c6638d 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -14,6 +15,8 @@ public: std::future submit(Request request) override; + void wait() override { pool.wait(); } + private: ThreadPool pool; }; diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 8130d742ee5..5c4debd56b6 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -11,19 +11,20 @@ namespace DB { +static constexpr auto DEFAULT_RETRY_NUM = 3; + WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( std::shared_ptr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, size_t buf_size_, - const WriteSettings & write_settings_, - std::optional> attributes_) + const WriteSettings & write_settings_) : BufferWithOwnMemory(buf_size_, nullptr, 0) - , blob_container_client(blob_container_client_) + , log(&Poco::Logger::get("WriteBufferFromAzureBlobStorage")) , max_single_part_upload_size(max_single_part_upload_size_) , blob_path(blob_path_) , write_settings(write_settings_) - , attributes(attributes_) + , blob_container_client(blob_container_client_) { } @@ -33,63 +34,69 @@ WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() finalize(); } -void WriteBufferFromAzureBlobStorage::finalizeImpl() +void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries) { - if (attributes.has_value()) + auto handle_exception = [&](const auto & e, size_t i) { - auto blob_client = blob_container_client->GetBlobClient(blob_path); - Azure::Storage::Metadata metadata; - for (const auto & [key, value] : *attributes) - metadata[key] = value; - blob_client.SetMetadata(metadata); - } + if (i == num_tries - 1) + throw; - const size_t max_tries = 3; - for (size_t i = 0; i < max_tries; ++i) + LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {}", i + 1, blob_path, e.Message); + }; + + for (size_t i = 0; i < num_tries; ++i) { try { - next(); + func(); break; } + catch (const Azure::Core::Http::TransportException & e) + { + handle_exception(e, i); + } catch (const Azure::Core::RequestFailedException & e) { - if (i == max_tries - 1) - throw; - LOG_INFO(&Poco::Logger::get("WriteBufferFromAzureBlobStorage"), - "Exception caught during finalizing azure storage write at attempt {}: {}", i + 1, e.Message); + handle_exception(e, i); } } } +void WriteBufferFromAzureBlobStorage::finalizeImpl() +{ + execWithRetry([this](){ next(); }, DEFAULT_RETRY_NUM); +} + void WriteBufferFromAzureBlobStorage::nextImpl() { if (!offset()) return; - auto * buffer_begin = working_buffer.begin(); - auto len = offset(); + char * buffer_begin = working_buffer.begin(); + size_t total_size = offset(); + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); - size_t read = 0; + size_t current_size = 0; std::vector block_ids; - while (read < len) + + while (current_size < total_size) { - auto part_len = std::min(len - read, max_single_part_upload_size); + size_t part_len = std::min(total_size - current_size, max_single_part_upload_size); + const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64)); - auto block_id = getRandomASCIIString(64); - block_ids.push_back(block_id); + Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast(buffer_begin + current_size), part_len); + execWithRetry([&](){ block_blob_client.StageBlock(block_id, tmp_buffer); }, DEFAULT_RETRY_NUM); - Azure::Core::IO::MemoryBodyStream tmp_buffer(reinterpret_cast(buffer_begin + read), part_len); - block_blob_client.StageBlock(block_id, tmp_buffer); - - read += part_len; + current_size += part_len; + LOG_TRACE(log, "Staged block (id: {}) of size {} (written {}/{}, blob path: {}).", block_id, part_len, current_size, total_size, blob_path); } - block_blob_client.CommitBlockList(block_ids); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, DEFAULT_RETRY_NUM); + LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); if (write_settings.remote_throttler) - write_settings.remote_throttler->add(read); + write_settings.remote_throttler->add(total_size); } } diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 0005705e68c..8bfd23a6379 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -13,20 +13,25 @@ #include +namespace Poco +{ +class Logger; +} + namespace DB { class WriteBufferFromAzureBlobStorage : public BufferWithOwnMemory { public: + using AzureClientPtr = std::shared_ptr; WriteBufferFromAzureBlobStorage( - std::shared_ptr blob_container_client_, + AzureClientPtr blob_container_client_, const String & blob_path_, size_t max_single_part_upload_size_, size_t buf_size_, - const WriteSettings & write_settings_, - std::optional> attributes_ = {}); + const WriteSettings & write_settings_); ~WriteBufferFromAzureBlobStorage() override; @@ -34,12 +39,15 @@ public: private: void finalizeImpl() override; + void execWithRetry(std::function func, size_t num_tries); - std::shared_ptr blob_container_client; - size_t max_single_part_upload_size; - const String blob_path; - WriteSettings write_settings; - std::optional> attributes; + Poco::Logger * log; + + const size_t max_single_part_upload_size; + const std::string blob_path; + const WriteSettings write_settings; + + AzureClientPtr blob_container_client; }; } diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index d87144dee55..98da89f81ed 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -77,13 +77,21 @@ std::unique_ptr createReadBufferFromFileBase( } else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) { - static AsynchronousReaderPtr reader = std::make_shared(); + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + + auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) { - static AsynchronousReaderPtr reader = std::make_shared(16, 1000000); + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + + auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER); res = std::make_unique( reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 09e5c3d32dc..b3dcfdafa9e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -107,7 +107,7 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL if (disk_read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = getThreadPoolReader(); + auto & reader = getThreadPoolReader(); return std::make_unique(reader, disk_read_settings, std::move(reader_impl)); } else diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index 65720ec3937..9d6610ee326 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -11,22 +11,25 @@ namespace DB namespace ErrorCodes { extern const int NOT_IMPLEMENTED; + extern const int LOGICAL_ERROR; } -AsynchronousReaderPtr IObjectStorage::getThreadPoolReader() +IAsynchronousReader & IObjectStorage::getThreadPoolReader() { - constexpr size_t pool_size = 50; - constexpr size_t queue_size = 1000000; - static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); - return reader; + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + + return context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); } ThreadPool & IObjectStorage::getThreadPoolWriter() { - constexpr size_t pool_size = 100; - constexpr size_t queue_size = 1000000; - static ThreadPool writer(pool_size, pool_size, queue_size); - return writer; + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + + return context->getThreadPoolWriter(); } void IObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index dc6683cfe95..52e1a2cb270 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -130,7 +130,7 @@ public: /// Path to directory with objects cache virtual const std::string & getCacheBasePath() const; - static AsynchronousReaderPtr getThreadPoolReader(); + static IAsynchronousReader & getThreadPoolReader(); static ThreadPool & getThreadPoolWriter(); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index c2131a51b74..213f744d84f 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -190,7 +190,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = getThreadPoolReader(); + auto & reader = getThreadPoolReader(); return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); } else @@ -230,6 +230,8 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files"); auto settings_ptr = s3_settings.get(); + auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + auto s3_buffer = std::make_unique( client.get(), bucket, @@ -237,7 +239,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN settings_ptr->s3_settings, attributes, buf_size, - threadPoolCallbackRunner(getThreadPoolWriter()), + std::move(scheduler), disk_write_settings); return std::make_unique( diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 74bae2a09a4..71bde110fa6 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -168,7 +168,7 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { - auto reader = IObjectStorage::getThreadPoolReader(); + auto & reader = IObjectStorage::getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(web_impl), min_bytes_for_seek); } else diff --git a/src/IO/AsynchronousReadBufferFromFile.cpp b/src/IO/AsynchronousReadBufferFromFile.cpp index f22001cdddf..8310d80b461 100644 --- a/src/IO/AsynchronousReadBufferFromFile.cpp +++ b/src/IO/AsynchronousReadBufferFromFile.cpp @@ -24,7 +24,7 @@ namespace ErrorCodes AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, const std::string & file_name_, size_t buf_size, @@ -32,7 +32,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( char * existing_memory, size_t alignment, std::optional file_size_) - : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_) + : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_) , file_name(file_name_) { ProfileEvents::increment(ProfileEvents::FileOpen); @@ -58,7 +58,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int & fd_, const std::string & original_file_name, @@ -66,7 +66,7 @@ AsynchronousReadBufferFromFile::AsynchronousReadBufferFromFile( char * existing_memory, size_t alignment, std::optional file_size_) - : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, fd_, buf_size, existing_memory, alignment, file_size_) + : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, fd_, buf_size, existing_memory, alignment, file_size_) , file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name) { fd_ = -1; @@ -105,4 +105,3 @@ AsynchronousReadBufferFromFileWithDescriptorsCache::~AsynchronousReadBufferFromF } - diff --git a/src/IO/AsynchronousReadBufferFromFile.h b/src/IO/AsynchronousReadBufferFromFile.h index 96834350bab..1b7eeec4f19 100644 --- a/src/IO/AsynchronousReadBufferFromFile.h +++ b/src/IO/AsynchronousReadBufferFromFile.h @@ -14,7 +14,7 @@ protected: public: explicit AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, @@ -25,7 +25,7 @@ public: /// Use pre-opened file descriptor. explicit AsynchronousReadBufferFromFile( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, @@ -45,7 +45,6 @@ public: } }; - /** Similar to AsynchronousReadBufferFromFile but also transparently shares open file descriptors. */ class AsynchronousReadBufferFromFileWithDescriptorsCache : public AsynchronousReadBufferFromFileDescriptor @@ -56,7 +55,7 @@ private: public: AsynchronousReadBufferFromFileWithDescriptorsCache( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, @@ -64,7 +63,7 @@ public: char * existing_memory = nullptr, size_t alignment = 0, std::optional file_size_ = std::nullopt) - : AsynchronousReadBufferFromFileDescriptor(std::move(reader_), priority_, -1, buf_size, existing_memory, alignment, file_size_) + : AsynchronousReadBufferFromFileDescriptor(reader_, priority_, -1, buf_size, existing_memory, alignment, file_size_) , file_name(file_name_) { file = OpenedFileCache::instance().get(file_name, flags); @@ -80,4 +79,3 @@ public: }; } - diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index 1bf889540eb..c7e2f9b0c41 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -54,7 +54,7 @@ std::future AsynchronousReadBufferFromFileDescripto return std::async(std::launch::deferred, [] { return IAsynchronousReader::Result{.size = 0, .offset = 0}; }); } - return reader->submit(request); + return reader.submit(request); } @@ -140,7 +140,7 @@ void AsynchronousReadBufferFromFileDescriptor::finalize() AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescriptor( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int fd_, size_t buf_size, @@ -148,7 +148,7 @@ AsynchronousReadBufferFromFileDescriptor::AsynchronousReadBufferFromFileDescript size_t alignment, std::optional file_size_) : ReadBufferFromFileBase(buf_size, existing_memory, alignment, file_size_) - , reader(std::move(reader_)) + , reader(reader_) , priority(priority_) , required_alignment(alignment) , fd(fd_) diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index 7ba842997f4..0659bb203a8 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -16,7 +16,7 @@ namespace DB class AsynchronousReadBufferFromFileDescriptor : public ReadBufferFromFileBase { protected: - AsynchronousReaderPtr reader; + IAsynchronousReader & reader; Int32 priority; Memory<> prefetch_buffer; @@ -36,7 +36,7 @@ protected: public: AsynchronousReadBufferFromFileDescriptor( - AsynchronousReaderPtr reader_, + IAsynchronousReader & reader_, Int32 priority_, int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index 4583f594c37..ea103a87460 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -18,7 +19,7 @@ namespace DB * For example, this interface may not suffice if you want to serve 10 000 000 of 4 KiB requests per second. * This interface is fairly limited. */ -class IAsynchronousReader +class IAsynchronousReader : private boost::noncopyable { public: /// For local filesystems, the file descriptor is simply integer @@ -68,6 +69,8 @@ public: /// The method can be called concurrently from multiple threads. virtual std::future submit(Request request) = 0; + virtual void wait() = 0; + /// Destructor must wait for all not completed request and ignore the results. /// It may also cancel the requests. virtual ~IAsynchronousReader() = default; diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index e7bb3dc72a8..d055a42fcb6 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -43,7 +43,7 @@ struct ParallelReadBuffer::ReadWorker }; ParallelReadBuffer::ParallelReadBuffer( - std::unique_ptr reader_factory_, CallbackRunner schedule_, size_t max_working_readers_) + std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers_) : SeekableReadBuffer(nullptr, 0) , max_working_readers(max_working_readers_) , schedule(std::move(schedule_)) @@ -71,7 +71,7 @@ bool ParallelReadBuffer::addReaderToPool() auto worker = read_workers.emplace_back(std::make_shared(std::move(reader))); ++active_working_reader; - schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }); + schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); }, 0); return true; } diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index 45b98f8c977..d6e9b7989ad 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -33,12 +33,13 @@ public: class ReadBufferFactory : public WithFileSize { public: + ~ReadBufferFactory() override = default; + virtual SeekableReadBufferPtr getReader() = 0; - virtual ~ReadBufferFactory() override = default; virtual off_t seek(off_t off, int whence) = 0; }; - explicit ParallelReadBuffer(std::unique_ptr reader_factory_, CallbackRunner schedule_, size_t max_working_readers); + ParallelReadBuffer(std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers); ~ParallelReadBuffer() override { finishAndWait(); } @@ -75,7 +76,7 @@ private: size_t max_working_readers; std::atomic_size_t active_working_reader{0}; - CallbackRunner schedule; + ThreadPoolCallbackRunner schedule; std::unique_ptr reader_factory; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 9fe10aecda5..30373816eca 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,3 +1,4 @@ +#include "Common/DNSResolver.h" #include #if USE_AWS_S3 @@ -257,6 +258,9 @@ void PocoHTTPClient::makeRequestInternal( if (!request_configuration.proxy_host.empty()) { + if (enable_s3_requests_logging) + LOG_TEST(log, "Due to reverse proxy host name ({}) won't be resolved on ClickHouse side", uri); + /// Reverse proxy can replace host header with resolved ip address instead of host name. /// This can lead to request signature difference on S3 side. session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); @@ -443,6 +447,10 @@ void PocoHTTPClient::makeRequestInternal( response->SetClientErrorMessage(getCurrentExceptionMessage(false)); addMetric(request, S3MetricType::Errors); + + /// Probably this is socket timeout or something more or less related to DNS + /// Let's just remove this host from DNS cache to be more safe + DNSResolver::instance().removeHostFromCache(Poco::URI(uri).getHost()); } } diff --git a/src/IO/SynchronousReader.h b/src/IO/SynchronousReader.h index 7b5113a4487..238d6e9371e 100644 --- a/src/IO/SynchronousReader.h +++ b/src/IO/SynchronousReader.h @@ -13,7 +13,8 @@ class SynchronousReader final : public IAsynchronousReader { public: std::future submit(Request request) override; + + void wait() override {} }; } - diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 0215376aa0e..3b9b042e2af 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -74,7 +74,7 @@ WriteBufferFromS3::WriteBufferFromS3( const S3Settings::ReadWriteSettings & s3_settings_, std::optional> object_metadata_, size_t buffer_size_, - ScheduleFunc schedule_, + ThreadPoolCallbackRunner schedule_, const WriteSettings & write_settings_) : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) @@ -292,7 +292,7 @@ void WriteBufferFromS3::writePart() } task_finish_notify(); - }); + }, 0); } catch (...) { @@ -442,7 +442,7 @@ void WriteBufferFromS3::makeSinglepartUpload() } task_notify_finish(); - }); + }, 0); } catch (...) { diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index ae03299ffbd..b655fe1d14b 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -33,7 +34,6 @@ namespace Aws::S3::Model namespace DB { -using ScheduleFunc = std::function)>; class WriteBufferFromFile; /** @@ -53,7 +53,7 @@ public: const S3Settings::ReadWriteSettings & s3_settings_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - ScheduleFunc schedule_ = {}, + ThreadPoolCallbackRunner schedule_ = {}, const WriteSettings & write_settings_ = {}); ~WriteBufferFromS3() override; @@ -106,7 +106,7 @@ private: /// Following fields are for background uploads in thread pool (if specified). /// We use std::function to avoid dependency of Interpreters - const ScheduleFunc schedule; + const ThreadPoolCallbackRunner schedule; std::unique_ptr put_object_task; /// Does not need protection by mutex because of the logic around is_finished field. std::list TSA_GUARDED_BY(bg_tasks_mutex) upload_object_tasks; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index be47338541c..321a46baff6 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -31,6 +31,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -228,6 +231,12 @@ struct ContextSharedPart : boost::noncopyable mutable std::unique_ptr distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) mutable std::unique_ptr message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) + mutable std::unique_ptr asynchronous_remote_fs_reader; + mutable std::unique_ptr asynchronous_local_fs_reader; + mutable std::unique_ptr synchronous_local_fs_reader; + + mutable std::unique_ptr threadpool_writer; + mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends mutable ThrottlerPtr remote_read_throttler; /// A server-wide throttler for remote IO reads @@ -311,23 +320,76 @@ struct ContextSharedPart : boost::noncopyable ~ContextSharedPart() { - try + /// Wait for thread pool for background reads and writes, + /// since it may use per-user MemoryTracker which will be destroyed here. + if (asynchronous_remote_fs_reader) { - /// Wait for thread pool for background writes, - /// since it may use per-user MemoryTracker which will be destroyed here. - IObjectStorage::getThreadPoolWriter().wait(); - /// Make sure that threadpool is destructed before this->process_list - /// because thread_status, which was created for threads inside threadpool, - /// relies on it. - if (load_marks_threadpool) + try { + LOG_DEBUG(log, "Desctructing remote fs threadpool reader"); + asynchronous_remote_fs_reader->wait(); + asynchronous_remote_fs_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (asynchronous_local_fs_reader) + { + try + { + LOG_DEBUG(log, "Desctructing local fs threadpool reader"); + asynchronous_local_fs_reader->wait(); + asynchronous_local_fs_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (synchronous_local_fs_reader) + { + try + { + LOG_DEBUG(log, "Desctructing local fs threadpool reader"); + synchronous_local_fs_reader->wait(); + synchronous_local_fs_reader.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (threadpool_writer) + { + try + { + LOG_DEBUG(log, "Desctructing threadpool writer"); + threadpool_writer->wait(); + threadpool_writer.reset(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + if (load_marks_threadpool) + { + try + { + LOG_DEBUG(log, "Desctructing marks loader"); load_marks_threadpool->wait(); load_marks_threadpool.reset(); } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } try @@ -3378,6 +3440,66 @@ OrdinaryBackgroundExecutorPtr Context::getCommonExecutor() const return shared->common_executor; } +IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) const +{ + const auto & config = getConfigRef(); + + auto lock = getLock(); + + switch (type) + { + case FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER: + { + if (!shared->asynchronous_remote_fs_reader) + { + auto pool_size = config.getUInt(".threadpool_remote_fs_reader_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_remote_fs_reader_queue_size", 1000000); + + shared->asynchronous_remote_fs_reader = std::make_unique(pool_size, queue_size); + } + + return *shared->asynchronous_remote_fs_reader; + } + case FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER: + { + if (!shared->asynchronous_local_fs_reader) + { + auto pool_size = config.getUInt(".threadpool_local_fs_reader_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_local_fs_reader_queue_size", 1000000); + + shared->asynchronous_local_fs_reader = std::make_unique(pool_size, queue_size); + } + + return *shared->asynchronous_local_fs_reader; + } + case FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER: + { + if (!shared->synchronous_local_fs_reader) + { + shared->synchronous_local_fs_reader = std::make_unique(); + } + + return *shared->synchronous_local_fs_reader; + } + } +} + +ThreadPool & Context::getThreadPoolWriter() const +{ + const auto & config = getConfigRef(); + + auto lock = getLock(); + + if (!shared->threadpool_writer) + { + auto pool_size = config.getUInt(".threadpool_writer_pool_size", 100); + auto queue_size = config.getUInt(".threadpool_writer_queue_size", 1000000); + + shared->threadpool_writer = std::make_unique(pool_size, pool_size, queue_size); + } + + return *shared->threadpool_writer; +} ReadSettings Context::getReadSettings() const { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b825c71a81e..a9984e32c1b 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1011,6 +1011,17 @@ public: OrdinaryBackgroundExecutorPtr getFetchesExecutor() const; OrdinaryBackgroundExecutorPtr getCommonExecutor() const; + enum class FilesystemReaderType + { + SYNCHRONOUS_LOCAL_FS_READER, + ASYNCHRONOUS_LOCAL_FS_READER, + ASYNCHRONOUS_REMOTE_FS_READER, + }; + + IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; + + ThreadPool & getThreadPoolWriter() const; + /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 50876ed29af..2d609c00406 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -50,21 +50,26 @@ bool HostID::isLocalAddress(UInt16 clickhouse_port) const void DDLLogEntry::assertVersion() const { - constexpr UInt64 max_version = 2; - if (version == 0 || max_version < version) + if (version == 0 + /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not change the entry format, it uses versioin 2, so there shouldn't be such version + || version == NORMALIZE_CREATE_ON_INITIATOR_VERSION + || version > DDL_ENTRY_FORMAT_MAX_VERSION) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown DDLLogEntry format version: {}." - "Maximum supported version is {}", version, max_version); + "Maximum supported version is {}", version, DDL_ENTRY_FORMAT_MAX_VERSION); } void DDLLogEntry::setSettingsIfRequired(ContextPtr context) { version = context->getSettingsRef().distributed_ddl_entry_format_version; + if (version <= 0 || version > DDL_ENTRY_FORMAT_MAX_VERSION) + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown distributed_ddl_entry_format_version: {}." + "Maximum supported version is {}.", version, DDL_ENTRY_FORMAT_MAX_VERSION); /// NORMALIZE_CREATE_ON_INITIATOR_VERSION does not affect entry format in ZooKeeper if (version == NORMALIZE_CREATE_ON_INITIATOR_VERSION) version = SETTINGS_IN_ZK_VERSION; - if (version == SETTINGS_IN_ZK_VERSION) + if (version >= SETTINGS_IN_ZK_VERSION) settings.emplace(context->getSettingsRef().changes()); } @@ -94,6 +99,9 @@ String DDLLogEntry::toString() const wb << "settings: " << serializeAST(ast) << "\n"; } + if (version >= OPENTELEMETRY_ENABLED_VERSION) + wb << "tracing: " << this->tracing_context; + return wb.str(); } @@ -106,7 +114,7 @@ void DDLLogEntry::parse(const String & data) Strings host_id_strings; rb >> "query: " >> escape >> query >> "\n"; - if (version == 1) + if (version == OLDEST_VERSION) { rb >> "hosts: " >> host_id_strings >> "\n"; @@ -115,9 +123,8 @@ void DDLLogEntry::parse(const String & data) else initiator.clear(); } - else if (version == 2) + else if (version >= SETTINGS_IN_ZK_VERSION) { - if (!rb.eof() && *rb.position() == 'h') rb >> "hosts: " >> host_id_strings >> "\n"; if (!rb.eof() && *rb.position() == 'i') @@ -134,6 +141,12 @@ void DDLLogEntry::parse(const String & data) } } + if (version >= OPENTELEMETRY_ENABLED_VERSION) + { + if (!rb.eof() && *rb.position() == 't') + rb >> "tracing: " >> this->tracing_context; + } + assertEOF(rb); if (!host_id_strings.empty()) diff --git a/src/Interpreters/DDLTask.h b/src/Interpreters/DDLTask.h index d5990edd43f..661cee84a45 100644 --- a/src/Interpreters/DDLTask.h +++ b/src/Interpreters/DDLTask.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -69,12 +70,18 @@ struct DDLLogEntry static constexpr const UInt64 OLDEST_VERSION = 1; static constexpr const UInt64 SETTINGS_IN_ZK_VERSION = 2; static constexpr const UInt64 NORMALIZE_CREATE_ON_INITIATOR_VERSION = 3; + static constexpr const UInt64 OPENTELEMETRY_ENABLED_VERSION = 4; + /// Add new version here + + /// Remember to update the value below once new version is added + static constexpr const UInt64 DDL_ENTRY_FORMAT_MAX_VERSION = 4; UInt64 version = 1; String query; std::vector hosts; String initiator; // optional std::optional settings; + OpenTelemetry::TracingContext tracing_context; void setSettingsIfRequired(ContextPtr context); String toString() const; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index c8878297c02..8873d851de1 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -515,6 +516,11 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) LOG_DEBUG(log, "Processing task {} ({})", task.entry_name, task.entry.query); chassert(!task.completely_processed); + /// Setup tracing context on current thread for current DDL + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + task.entry.tracing_context, + this->context->getOpenTelemetrySpanLog()); + String active_node_path = task.getActiveNodePath(); String finished_node_path = task.getFinishedNodePath(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 622f2a71ec9..41c378babcd 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -766,6 +766,16 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat throw Exception("Column " + backQuoteIfNeed(column.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); } + /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. + if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree")) + { + auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); + if (search != all_columns.end()) + throw Exception("Cannot create table with column '" + LightweightDeleteDescription::FILTER_COLUMN.name + "' " + "for *MergeTree engines because it is reserved for lightweight delete feature", + ErrorCodes::ILLEGAL_COLUMN); + } + const auto & settings = getContext()->getSettingsRef(); /// Check low cardinality types in creating table if it was not allowed in setting diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index b1f5749da25..9a4152415af 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -350,7 +350,10 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) /// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory /// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads()) - thread_group->getProfileEventsCountersAndMemoryForThreads(); + { + std::lock_guard guard(thread_group->mutex); + auto stats = std::move(thread_group->finished_threads_counters_memory); + } thread_group.reset(); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 25e1dce4f9f..454474dde2b 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -55,6 +55,8 @@ bool isSupportedAlterType(int type) BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, const DDLQueryOnClusterParams & params) { + OpenTelemetry::SpanHolder span(__FUNCTION__); + if (context->getCurrentTransaction() && context->getSettingsRef().throw_on_unsupported_query_inside_transaction) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ON CLUSTER queries inside transactions are not supported"); @@ -88,6 +90,8 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, cluster = context->getCluster(query->cluster); } + span.addAttribute("clickhouse.cluster", query->cluster); + /// TODO: support per-cluster grant context->checkAccess(AccessType::CLUSTER); @@ -164,6 +168,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, entry.query = queryToString(query_ptr); entry.initiator = ddl_worker.getCommonHostID(); entry.setSettingsIfRequired(context); + entry.tracing_context = OpenTelemetry::CurrentContext(); String node_path = ddl_worker.enqueueQuery(entry); return getDistributedDDLStatus(node_path, entry, context); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index abecb24852b..7a633242904 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -73,6 +73,7 @@ #include #include +#include namespace ProfileEvents { @@ -392,10 +393,20 @@ static std::tuple executeQueryImpl( String query_table; try { - ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + if (settings.dialect == Dialect::kusto && !internal) + { + ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); - /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } + else + { + ParserQuery parser(end, settings.allow_settings_after_format_in_insert); + + /// TODO: parser should fail early when max_query_size limit is reached. + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + } if (auto txn = context->getCurrentTransaction()) { diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp index adab79d06cb..9bf32e4f2e1 100644 --- a/src/Interpreters/threadPoolCallbackRunner.cpp +++ b/src/Interpreters/threadPoolCallbackRunner.cpp @@ -1,40 +1,44 @@ #include "threadPoolCallbackRunner.h" #include - #include +#include +#include +#include + namespace DB { -CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) +template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) { - return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) mutable + return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future { - pool->scheduleOrThrow( - [&, callback = std::move(callback), thread_group]() - { + auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE({ - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - - /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. - /// Typically, it may be changes from Process to User. - /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. - /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. - /// When, finally, we destroy the thread (and the ThreadStatus), - /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory, - /// and by this time user-level memory tracker may be already destroyed. - /// - /// As a work-around, reset memory tracker to total, which is always alive. - CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); - }); - callback(); + CurrentThread::detachQueryIfNotDetached(); }); + + setThreadName(thread_name.data()); + + return callback(); + }); + + auto future = task->get_future(); + + /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". + pool->scheduleOrThrow([task]{ (*task)(); }, -priority); + + return future; }; } +template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); +template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); + } diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index 59d06f2f1bc..c146ac67482 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -1,15 +1,18 @@ #pragma once #include +#include namespace DB { -/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously -using CallbackRunner = std::function)>; +/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously. +template +using ThreadPoolCallbackRunner = std::function(std::function &&, size_t priority)>; -/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' -CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); +/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. +template +ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); } diff --git a/src/Parsers/CMakeLists.txt b/src/Parsers/CMakeLists.txt index 73f300fd5f6..73d46593e04 100644 --- a/src/Parsers/CMakeLists.txt +++ b/src/Parsers/CMakeLists.txt @@ -3,6 +3,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_parsers .) add_headers_and_sources(clickhouse_parsers ./Access) add_headers_and_sources(clickhouse_parsers ./MySQL) +add_headers_and_sources(clickhouse_parsers ./Kusto) add_library(clickhouse_parsers ${clickhouse_parsers_headers} ${clickhouse_parsers_sources}) target_link_libraries(clickhouse_parsers PUBLIC clickhouse_common_io clickhouse_common_access string_utils) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1de9adb834e..81d6f34aced 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -53,89 +53,6 @@ namespace ErrorCodes } -bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr contents_node; - ParserExpressionList contents(false); - - if (pos->type != TokenType::OpeningSquareBracket) - return false; - ++pos; - - if (!contents.parse(pos, contents_node, expected)) - return false; - - if (pos->type != TokenType::ClosingSquareBracket) - return false; - ++pos; - - auto function_node = std::make_shared(); - function_node->name = "array"; - function_node->arguments = contents_node; - function_node->children.push_back(contents_node); - node = function_node; - - return true; -} - - -bool ParserParenthesisExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr contents_node; - ParserExpressionList contents(false); - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - if (!contents.parse(pos, contents_node, expected)) - return false; - - bool is_elem = true; - if (pos->type == TokenType::Comma) - { - is_elem = false; - ++pos; - } - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - const auto & expr_list = contents_node->as(); - - /// Empty expression in parentheses is not allowed. - if (expr_list.children.empty()) - { - expected.add(pos, "non-empty parenthesized list of expressions"); - return false; - } - - /// Special case for one-element tuple. - if (expr_list.children.size() == 1 && is_elem) - { - auto * ast_literal = expr_list.children.front()->as(); - /// But only if its argument is not tuple, - /// since otherwise it will do incorrect transformation: - /// - /// (foo,bar) IN (('foo','bar')) -> (foo,bar) IN ('foo','bar') - if (!(ast_literal && ast_literal->value.getType() == Field::Types::Tuple)) - { - node = expr_list.children.front(); - return true; - } - } - - auto function_node = std::make_shared(); - function_node->name = "tuple"; - function_node->arguments = contents_node; - function_node->children.push_back(contents_node); - node = function_node; - - return true; -} - - bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr select_node; @@ -282,850 +199,10 @@ ASTPtr createFunctionCast(const ASTPtr & expr_ast, const ASTPtr & type_ast) { /// Convert to canonical representation in functional form: CAST(expr, 'type') auto type_literal = std::make_shared(queryToString(type_ast)); - - auto expr_list_args = std::make_shared(); - expr_list_args->children.push_back(expr_ast); - expr_list_args->children.push_back(std::move(type_literal)); - - auto func_node = std::make_shared(); - func_node->name = "CAST"; - func_node->arguments = std::move(expr_list_args); - func_node->children.push_back(func_node->arguments); - - return func_node; + return makeASTFunction("CAST", expr_ast, std::move(type_literal)); } -namespace -{ - bool parseCastAs(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /** Possible variants for cast operator cast(expr [[AS] alias_1] AS Type), cast(expr [[AS] alias_1], type_expr [[as] alias_2]). - * First try to match with cast(expr [[AS] alias_1] AS Type) - * Then try to match with cast(expr [[AS] alias_1], type_expr [[as] alias_2]). - */ - - ASTPtr expr_node; - ASTPtr type_node; - ASTPtr identifier_node; - - if (ParserExpression().parse(pos, expr_node, expected)) - { - ParserKeyword as_keyword_parser("AS"); - bool parse_as = as_keyword_parser.ignore(pos, expected); - - /// CAST (a b AS UInt32) OR CAST (a b, expr) - - if (!parse_as && ParserIdentifier().parse(pos, identifier_node, expected)) - { - expr_node->setAlias(getIdentifierName(identifier_node)); - parse_as = as_keyword_parser.ignore(pos, expected); - } - - if (parse_as) - { - /// CAST (a AS Type) OR CAST (a AS b AS Type) OR CAST (a AS b, expr) - - auto begin = pos; - auto expected_copy = expected; - bool next_identifier = ParserIdentifier().ignore(begin, expected_copy); - bool next_identifier_with_comma = next_identifier && ParserToken(TokenType::Comma).ignore(begin, expected_copy); - bool next_identifier_with_as - = next_identifier && !next_identifier_with_comma && as_keyword_parser.ignore(begin, expected_copy); - - if (next_identifier_with_as) - { - if (ParserIdentifier().parse(pos, identifier_node, expected) && as_keyword_parser.ignore(pos, expected)) - expr_node->setAlias(getIdentifierName(identifier_node)); - else - return false; - } - - if (!next_identifier_with_comma && ParserDataType().parse(pos, type_node, expected)) - { - node = createFunctionCast(expr_node, type_node); - return true; - } - } - - /// CAST(a AS b, expr) - - if (parse_as) - { - if (ParserIdentifier().parse(pos, identifier_node, expected)) - expr_node->setAlias(getIdentifierName(identifier_node)); - else - return false; - } - - if (ParserToken(TokenType::Comma).ignore(pos, expected) - && ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, type_node, expected)) - { - node = makeASTFunction("CAST", expr_node, type_node); - return true; - } - } - - return false; - } - - bool parseSubstring(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// Either SUBSTRING(expr FROM start) or SUBSTRING(expr FROM start FOR length) or SUBSTRING(expr, start, length) - /// The latter will be parsed normally as a function later. - - ParserKeyword as_keyword_parser("AS"); - ParserIdentifier identifier_parser; - - ASTPtr expr_node; - ASTPtr start_node; - ASTPtr length_node; - - if (!ParserExpression().parse(pos, expr_node, expected)) - return false; - - auto from_keyword_parser = ParserKeyword("FROM"); - bool from_exists = from_keyword_parser.check(pos, expected); - - if (!from_exists && pos->type != TokenType::Comma) - { - ASTPtr identifier_node; - bool parsed_as = as_keyword_parser.ignore(pos, expected); - bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected); - - if (parsed_as && !parsed_identifer) - return false; - - if (parsed_identifer) - expr_node->setAlias(getIdentifierName(identifier_node)); - - from_exists = from_keyword_parser.check(pos, expected); - } - - if (pos->type == TokenType::Comma) - { - if (from_exists) - return false; - - ++pos; - } - - if (!ParserExpression().parse(pos, start_node, expected)) - return false; - - auto for_keyword_parser = ParserKeyword("FOR"); - bool for_exists = for_keyword_parser.check(pos, expected); - if (!for_exists && pos->type != TokenType::Comma) - { - ASTPtr identifier_node; - bool parsed_as = as_keyword_parser.ignore(pos, expected); - bool parsed_identifer = identifier_parser.parse(pos, identifier_node, expected); - if (parsed_as && !parsed_identifer) - return false; - - if (parsed_identifer) - start_node->setAlias(getIdentifierName(identifier_node)); - - for_exists = for_keyword_parser.check(pos, expected); - } - - bool need_parse_length_expression = for_exists; - if (pos->type == TokenType::Comma) - { - if (for_exists) - return false; - - ++pos; - need_parse_length_expression = true; - } - - if (need_parse_length_expression - && !ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, length_node, expected)) - return false; - - /// Convert to canonical representation in functional form: SUBSTRING(expr, start, length) - if (length_node) - node = makeASTFunction("substring", expr_node, start_node, length_node); - else - node = makeASTFunction("substring", expr_node, start_node); - - return true; - } - - bool parseTrim(bool trim_left, bool trim_right, IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// Handles all possible TRIM/LTRIM/RTRIM call variants ([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) - - std::string func_name; - bool char_override = false; - ASTPtr expr_node; - ASTPtr pattern_node; - ASTPtr to_remove; - - if (!trim_left && !trim_right) - { - if (ParserKeyword("BOTH").ignore(pos, expected)) - { - trim_left = true; - trim_right = true; - char_override = true; - } - else if (ParserKeyword("LEADING").ignore(pos, expected)) - { - trim_left = true; - char_override = true; - } - else if (ParserKeyword("TRAILING").ignore(pos, expected)) - { - trim_right = true; - char_override = true; - } - else - { - trim_left = true; - trim_right = true; - } - - if (char_override) - { - if (!ParserExpression().parse(pos, to_remove, expected)) - return false; - - auto from_keyword_parser = ParserKeyword("FROM"); - bool from_exists = from_keyword_parser.check(pos, expected); - - if (!from_exists) - { - ASTPtr identifier_node; - bool parsed_as = ParserKeyword("AS").ignore(pos, expected); - bool parsed_identifer = ParserIdentifier().parse(pos, identifier_node, expected); - - if (parsed_as && !parsed_identifer) - return false; - - if (parsed_identifer) - to_remove->setAlias(getIdentifierName(identifier_node)); - - from_exists = from_keyword_parser.check(pos, expected); - } - - if (!from_exists) - return false; - - auto quote_meta_func_node = std::make_shared(); - auto quote_meta_list_args = std::make_shared(); - quote_meta_list_args->children = {to_remove}; - - quote_meta_func_node->name = "regexpQuoteMeta"; - quote_meta_func_node->arguments = std::move(quote_meta_list_args); - quote_meta_func_node->children.push_back(quote_meta_func_node->arguments); - - to_remove = std::move(quote_meta_func_node); - } - } - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, expr_node, expected)) - return false; - - /// Convert to regexp replace function call - - if (char_override) - { - auto pattern_func_node = std::make_shared(); - auto pattern_list_args = std::make_shared(); - if (trim_left && trim_right) - { - pattern_list_args->children - = {std::make_shared("^["), - to_remove, - std::make_shared("]+|["), - to_remove, - std::make_shared("]+$")}; - func_name = "replaceRegexpAll"; - } - else - { - if (trim_left) - { - pattern_list_args->children = {std::make_shared("^["), to_remove, std::make_shared("]+")}; - } - else - { - /// trim_right == false not possible - pattern_list_args->children = {std::make_shared("["), to_remove, std::make_shared("]+$")}; - } - func_name = "replaceRegexpOne"; - } - - pattern_func_node->name = "concat"; - pattern_func_node->arguments = std::move(pattern_list_args); - pattern_func_node->children.push_back(pattern_func_node->arguments); - - pattern_node = std::move(pattern_func_node); - } - else - { - if (trim_left && trim_right) - { - func_name = "trimBoth"; - } - else - { - if (trim_left) - { - func_name = "trimLeft"; - } - else - { - /// trim_right == false not possible - func_name = "trimRight"; - } - } - } - - if (char_override) - node = makeASTFunction(func_name, expr_node, pattern_node, std::make_shared("")); - else - node = makeASTFunction(func_name, expr_node); - return true; - } - - bool parseExtract(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First try to match with date extract operator EXTRACT(part FROM date) - /// Then with function extract(haystack, pattern) - - IParser::Pos begin = pos; - IntervalKind interval_kind; - - if (parseIntervalKind(pos, expected, interval_kind)) - { - ASTPtr expr; - - ParserKeyword s_from("FROM"); - ParserExpressionWithOptionalAlias elem_parser(true /*allow_alias_without_as_keyword*/); - - if (s_from.ignore(pos, expected) && elem_parser.parse(pos, expr, expected)) - { - node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), expr); - return true; - } - } - - pos = begin; - - ASTPtr expr_list; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = "extract"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } - - bool parsePosition(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First try to match with position(needle IN haystack) - /// Then with position(haystack, needle[, start_pos]) - - ParserExpressionWithOptionalAlias expr_parser(true /*allow_alias_without_as_keyword*/); - - ASTPtr first_arg_expr_node; - if (!expr_parser.parse(pos, first_arg_expr_node, expected)) - { - return false; - } - - ASTFunction * func_in = typeid_cast(first_arg_expr_node.get()); - if (func_in && func_in->name == "in") - { - ASTExpressionList * in_args = typeid_cast(func_in->arguments.get()); - if (in_args && in_args->children.size() == 2) - { - node = makeASTFunction("position", in_args->children[1], in_args->children[0]); - return true; - } - } - - if (pos->type != TokenType::Comma) - return false; - ++pos; - - ASTPtr second_arg_expr_node; - if (!expr_parser.parse(pos, second_arg_expr_node, expected)) - { - return false; - } - - ASTPtr start_pos_expr_node; - if (pos->type == TokenType::Comma) - { - ++pos; - - if (!expr_parser.parse(pos, start_pos_expr_node, expected)) - return false; - } - - auto arguments = std::make_shared(); - arguments->children.push_back(std::move(first_arg_expr_node)); - arguments->children.push_back(std::move(second_arg_expr_node)); - - if (start_pos_expr_node) - arguments->children.push_back(std::move(start_pos_expr_node)); - - auto res = std::make_shared(); - res->name = "position"; - res->arguments = std::move(arguments); - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } - - bool parseDateAdd(const char * function_name, IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First to match with function(unit, offset, timestamp) - /// Then with function(offset, timestamp) - - ASTPtr timestamp_node; - ASTPtr offset_node; - - IntervalKind interval_kind; - ASTPtr interval_func_node; - if (parseIntervalKind(pos, expected, interval_kind)) - { - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, offset_node, expected)) - return false; - - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, timestamp_node, expected)) - return false; - auto interval_expr_list_args = std::make_shared(); - interval_expr_list_args->children = {offset_node}; - - interval_func_node = std::make_shared(); - interval_func_node->as().name = interval_kind.toNameOfFunctionToIntervalDataType(); - interval_func_node->as().arguments = std::move(interval_expr_list_args); - interval_func_node->as().children.push_back(interval_func_node->as().arguments); - } - else - { - ASTPtr expr_list; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = function_name; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } - - node = makeASTFunction(function_name, timestamp_node, interval_func_node); - return true; - } - - bool parseDateDiff(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - /// First to match with dateDiff(unit, startdate, enddate, [timezone]) - /// Then with dateDiff('unit', startdate, enddate, [timezone]) - - ASTPtr left_node; - ASTPtr right_node; - - IntervalKind interval_kind; - if (parseIntervalKind(pos, expected, interval_kind)) - { - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, left_node, expected)) - return false; - - if (pos->type != TokenType::Comma) - return false; - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, right_node, expected)) - return false; - - ASTPtr timezone_node; - - if (pos->type == TokenType::Comma) - { - /// Optional timezone - ++pos; - - if (!ParserExpressionWithOptionalAlias(true /*allow_alias_without_as_keyword*/).parse(pos, timezone_node, expected)) - return false; - } - - auto interval_literal = std::make_shared(interval_kind.toDateDiffUnit()); - if (timezone_node) - node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node), std::move(timezone_node)); - else - node = makeASTFunction("dateDiff", std::move(interval_literal), std::move(left_node), std::move(right_node)); - - return true; - } - - ASTPtr expr_list; - if (!ParserExpressionList(true /*allow_alias_without_as_keyword*/).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = "dateDiff"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - - return true; - } - - bool parseExists(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) - return false; - - auto subquery = std::make_shared(); - subquery->children.push_back(node); - node = makeASTFunction("exists", subquery); - return true; - } - - bool parseGrouping(IParser::Pos & pos, ASTPtr & node, Expected & expected) - { - ASTPtr expr_list; - if (!ParserExpressionList(false, false).parse(pos, expr_list, expected)) - return false; - - auto res = std::make_shared(); - res->name = "grouping"; - res->arguments = expr_list; - res->children.push_back(res->arguments); - node = std::move(res); - return true; - } -} - - -bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserIdentifier id_parser; - - bool has_all = false; - bool has_distinct = false; - - ASTPtr identifier; - ASTPtr query; - ASTPtr expr_list_args; - ASTPtr expr_list_params; - - if (is_table_function) - { - if (ParserTableFunctionView().parse(pos, node, expected)) - return true; - } - - if (!id_parser.parse(pos, identifier, expected)) - return false; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - ++pos; - - /// Avoid excessive backtracking. - //pos.putBarrier(); - - /// Special cases for expressions that look like functions but contain some syntax sugar: - - /// CAST, EXTRACT, POSITION, EXISTS - /// DATE_ADD, DATEADD, TIMESTAMPADD, DATE_SUB, DATESUB, TIMESTAMPSUB, - /// DATE_DIFF, DATEDIFF, TIMESTAMPDIFF, TIMESTAMP_DIFF, - /// SUBSTRING, TRIM, LTRIM, RTRIM, POSITION - - /// Can be parsed as a composition of functions, but the contents must be unwrapped: - /// POSITION(x IN y) -> POSITION(in(x, y)) -> POSITION(y, x) - - /// Can be parsed as a function, but not always: - /// CAST(x AS type) - alias has to be unwrapped - /// CAST(x AS type(params)) - - /// Can be parsed as a function, but some identifier arguments have special meanings. - /// DATE_ADD(MINUTE, x, y) -> addMinutes(x, y) - /// DATE_DIFF(MINUTE, x, y) - - /// Have keywords that have to processed explicitly: - /// EXTRACT(x FROM y) - /// TRIM(BOTH|LEADING|TRAILING x FROM y) - /// SUBSTRING(x FROM a) - /// SUBSTRING(x FROM a FOR b) - - String function_name = getIdentifierName(identifier); - String function_name_lowercase = Poco::toLower(function_name); - - std::optional parsed_special_function; - - if (function_name_lowercase == "cast") - parsed_special_function = parseCastAs(pos, node, expected); - else if (function_name_lowercase == "extract") - parsed_special_function = parseExtract(pos, node, expected); - else if (function_name_lowercase == "substring") - parsed_special_function = parseSubstring(pos, node, expected); - else if (function_name_lowercase == "position") - parsed_special_function = parsePosition(pos, node, expected); - else if (function_name_lowercase == "exists") - parsed_special_function = parseExists(pos, node, expected); - else if (function_name_lowercase == "trim") - parsed_special_function = parseTrim(false, false, pos, node, expected); - else if (function_name_lowercase == "ltrim") - parsed_special_function = parseTrim(true, false, pos, node, expected); - else if (function_name_lowercase == "rtrim") - parsed_special_function = parseTrim(false, true, pos, node, expected); - else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" - || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") - parsed_special_function = parseDateAdd("plus", pos, node, expected); - else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" - || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") - parsed_special_function = parseDateAdd("minus", pos, node, expected); - else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" - || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") - parsed_special_function = parseDateDiff(pos, node, expected); - else if (function_name_lowercase == "grouping") - parsed_special_function = parseGrouping(pos, node, expected); - - if (parsed_special_function.has_value()) - return parsed_special_function.value() && ParserToken(TokenType::ClosingRoundBracket).ignore(pos); - - auto pos_after_bracket = pos; - auto old_expected = expected; - - ParserKeyword all("ALL"); - ParserKeyword distinct("DISTINCT"); - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_all = false; - has_distinct = false; - } - } - - ParserExpressionList contents(false, is_table_function); - - const char * contents_begin = pos->begin; - if (!contents.parse(pos, expr_list_args, expected)) - return false; - const char * contents_end = pos->begin; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - - /** Check for a common error case - often due to the complexity of quoting command-line arguments, - * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). - * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, - * and the query silently returns an unexpected result. - */ - if (function_name == "toDate" - && contents_end - contents_begin == strlen("2014-01-01") - && contents_begin[0] >= '2' && contents_begin[0] <= '3' - && contents_begin[1] >= '0' && contents_begin[1] <= '9' - && contents_begin[2] >= '0' && contents_begin[2] <= '9' - && contents_begin[3] >= '0' && contents_begin[3] <= '9' - && contents_begin[4] == '-' - && contents_begin[5] >= '0' && contents_begin[5] <= '9' - && contents_begin[6] >= '0' && contents_begin[6] <= '9' - && contents_begin[7] == '-' - && contents_begin[8] >= '0' && contents_begin[8] <= '9' - && contents_begin[9] >= '0' && contents_begin[9] <= '9') - { - std::string contents_str(contents_begin, contents_end - contents_begin); - throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" - , ErrorCodes::SYNTAX_ERROR); - } - - /// The parametric aggregate function has two lists (parameters and arguments) in parentheses. Example: quantile(0.9)(x). - if (allow_function_parameters && pos->type == TokenType::OpeningRoundBracket) - { - ++pos; - - /// Parametric aggregate functions cannot have DISTINCT in parameters list. - if (has_distinct) - return false; - - expr_list_params = expr_list_args; - expr_list_args = nullptr; - - pos_after_bracket = pos; - old_expected = expected; - - if (all.ignore(pos, expected)) - has_all = true; - - if (distinct.ignore(pos, expected)) - has_distinct = true; - - if (!has_all && all.ignore(pos, expected)) - has_all = true; - - if (has_all && has_distinct) - return false; - - if (has_all || has_distinct) - { - /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier - if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) - { - pos = pos_after_bracket; - expected = old_expected; - has_distinct = false; - } - } - - if (!contents.parse(pos, expr_list_args, expected)) - return false; - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - ++pos; - } - - auto function_node = std::make_shared(); - tryGetIdentifierNameInto(identifier, function_node->name); - - /// func(DISTINCT ...) is equivalent to funcDistinct(...) - if (has_distinct) - function_node->name += "Distinct"; - - function_node->arguments = expr_list_args; - function_node->children.push_back(function_node->arguments); - - if (expr_list_params) - { - function_node->parameters = expr_list_params; - function_node->children.push_back(function_node->parameters); - } - - ParserKeyword filter("FILTER"); - ParserKeyword over("OVER"); - - if (filter.ignore(pos, expected)) - { - // We are slightly breaking the parser interface by parsing the window - // definition into an existing ASTFunction. Normally it would take a - // reference to ASTPtr and assign it the new node. We only have a pointer - // of a different type, hence this workaround with a temporary pointer. - ASTPtr function_node_as_iast = function_node; - - ParserFilterClause filter_parser; - if (!filter_parser.parse(pos, function_node_as_iast, expected)) - return false; - } - - if (over.ignore(pos, expected)) - { - function_node->is_window_function = true; - - ASTPtr function_node_as_iast = function_node; - - ParserWindowReference window_reference; - if (!window_reference.parse(pos, function_node_as_iast, expected)) - return false; - } - - node = function_node; - return true; -} - -bool ParserTableFunctionView::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserIdentifier id_parser; - ParserSelectWithUnionQuery select; - - ASTPtr identifier; - ASTPtr query; - - bool if_permitted = false; - - if (ParserKeyword{"VIEWIFPERMITTED"}.ignore(pos, expected)) - if_permitted = true; - else if (!ParserKeyword{"VIEW"}.ignore(pos, expected)) - return false; - - if (pos->type != TokenType::OpeningRoundBracket) - return false; - - ++pos; - - bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; - - if (!select.parse(pos, query, expected)) - return false; - - auto & select_ast = query->as(); - if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) - { - // It's an subquery. Bail out. - return false; - } - - ASTPtr else_ast; - if (if_permitted) - { - if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) - return false; - - if (!ParserWithOptionalAlias{std::make_unique(true, true), true}.parse(pos, else_ast, expected)) - return false; - } - - if (pos->type != TokenType::ClosingRoundBracket) - return false; - - ++pos; - - auto expr_list = std::make_shared(); - expr_list->children.push_back(query); - if (if_permitted) - expr_list->children.push_back(else_ast); - - auto function_node = std::make_shared(); - tryGetIdentifierNameInto(identifier, function_node->name); - function_node->name = if_permitted ? "viewIfPermitted" : "view"; - function_node->arguments = expr_list; - function_node->children.push_back(function_node->arguments); - node = function_node; - return true; -} - bool ParserFilterClause::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { assert(node); @@ -1865,42 +942,63 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return true; } +template +struct CollectionOfLiteralsLayer +{ + explicit CollectionOfLiteralsLayer(IParser::Pos & pos) : literal_begin(pos) + { + ++pos; + } + + IParser::Pos literal_begin; + Collection arr; +}; + template bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != opening_bracket) return false; - Pos literal_begin = pos; + std::vector> layers; + layers.emplace_back(pos); + pos.increaseDepth(); - Collection arr; ParserLiteral literal_p; - ParserCollectionOfLiterals collection_p(opening_bracket, closing_bracket); - ++pos; while (pos.isValid()) { - if (!arr.empty()) + if (!layers.back().arr.empty()) { if (pos->type == closing_bracket) { std::shared_ptr literal; /// Parse one-element tuples (e.g. (1)) later as single values for backward compatibility. - if (std::is_same_v && arr.size() == 1) + if (std::is_same_v && layers.back().arr.size() == 1) return false; - literal = std::make_shared(std::move(arr)); - literal->begin = literal_begin; + literal = std::make_shared(std::move(layers.back().arr)); + literal->begin = layers.back().literal_begin; literal->end = ++pos; - node = literal; - return true; + + layers.pop_back(); + pos.decreaseDepth(); + + if (layers.empty()) + { + node = literal; + return true; + } + + layers.back().arr.push_back(literal->value); + continue; } else if (pos->type == TokenType::Comma) { ++pos; } - else if (pos->type == TokenType::Colon && std::is_same_v && arr.size() % 2 == 1) + else if (pos->type == TokenType::Colon && std::is_same_v && layers.back().arr.size() % 2 == 1) { ++pos; } @@ -1912,10 +1010,17 @@ bool ParserCollectionOfLiterals::parseImpl(Pos & pos, ASTPtr & node, } ASTPtr literal_node; - if (!literal_p.parse(pos, literal_node, expected) && !collection_p.parse(pos, literal_node, expected)) + if (literal_p.parse(pos, literal_node, expected)) + { + layers.back().arr.push_back(literal_node->as().value); + } + else if (pos->type == opening_bracket) + { + layers.emplace_back(pos); + pos.increaseDepth(); + } + else return false; - - arr.push_back(literal_node->as().value); } expected.add(pos, getTokenName(closing_bracket)); @@ -2093,7 +1198,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e ASTPtr func_name; ASTPtr expr_list_args; auto opos = pos; - if (ParserLambdaExpression().parse(pos, lambda, expected)) + if (ParserExpression().parse(pos, lambda, expected)) { if (const auto * func = lambda->as(); func && func->name == "lambda") { @@ -2396,41 +1501,13 @@ bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & e } auto name_literal = std::make_shared(name); - - auto expr_list_args = std::make_shared(); - expr_list_args->children.push_back(std::move(name_literal)); - - auto function_node = std::make_shared(); - function_node->name = "globalVariable"; - function_node->arguments = expr_list_args; - function_node->children.push_back(expr_list_args); - - node = function_node; + node = makeASTFunction("globalVariable", name_literal); node->setAlias("@@" + name); + return true; } -bool ParserExpressionElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - return ParserSubquery().parse(pos, node, expected) - || ParserCastOperator().parse(pos, node, expected) - || ParserTupleOfLiterals().parse(pos, node, expected) - || ParserParenthesisExpression().parse(pos, node, expected) - || ParserArrayOfLiterals().parse(pos, node, expected) - || ParserArray().parse(pos, node, expected) - || ParserLiteral().parse(pos, node, expected) - || ParserCase().parse(pos, node, expected) - || ParserColumnsMatcher().parse(pos, node, expected) /// before ParserFunction because it can be also parsed as a function. - || ParserFunction().parse(pos, node, expected) - || ParserQualifiedAsterisk().parse(pos, node, expected) - || ParserAsterisk().parse(pos, node, expected) - || ParserCompoundIdentifier(false, true).parse(pos, node, expected) - || ParserSubstitution().parse(pos, node, expected) - || ParserMySQLGlobalVariable().parse(pos, node, expected); -} - - bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (!elem_parser->parse(pos, node, expected)) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 3883631b61c..f538555f0c1 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -9,26 +9,6 @@ namespace DB { -class ParserArray : public IParserBase -{ -protected: - const char * getName() const override { return "array"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -/** If in parenthesis an expression from one element - returns this element in `node`; - * or if there is a SELECT subquery in parenthesis, then this subquery returned in `node`; - * otherwise returns `tuple` function from the contents of brackets. - */ -class ParserParenthesisExpression : public IParserBase -{ -protected: - const char * getName() const override { return "parenthesized expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - /** The SELECT subquery is in parenthesis. */ class ParserSubquery : public IParserBase @@ -141,36 +121,6 @@ protected: ColumnTransformers allowed_transformers; }; -/** A function, for example, f(x, y + 1, g(z)). - * Or an aggregate function: sum(x + f(y)), corr(x, y). The syntax is the same as the usual function. - * Or a parametric aggregate function: quantile(0.9)(x + y). - * Syntax - two pairs of parentheses instead of one. The first is for parameters, the second for arguments. - * For functions, the DISTINCT modifier can be specified, for example, count(DISTINCT x, y). - */ -class ParserFunction : public IParserBase -{ -public: - explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) - : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) - { - } - -protected: - const char * getName() const override { return "function"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - bool allow_function_parameters; - bool is_table_function; -}; - -// A special function parser for view and viewIfPermitted table functions. -// It parses an SELECT query as its argument and doesn't support getColumnName(). -class ParserTableFunctionView : public IParserBase -{ -protected: - const char * getName() const override { return "function"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - // Allows to make queries like SELECT SUM() FILTER(WHERE ) FROM ... class ParserFilterClause : public IParserBase { @@ -394,16 +344,6 @@ protected: }; -/** The expression element is one of: an expression in parentheses, an array, a literal, a function, an identifier, an asterisk. - */ -class ParserExpressionElement : public IParserBase -{ -protected: - const char * getName() const override { return "element of expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - /** An expression element, possibly with an alias, if appropriate. */ class ParserWithOptionalAlias : public IParserBase diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4e88e5c68e6..f7a016a59e4 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -19,80 +19,21 @@ #include #include +#include + +#include +#include + using namespace std::literals; namespace DB { -const char * ParserMultiplicativeExpression::operators[] = +namespace ErrorCodes { - "*", "multiply", - "/", "divide", - "%", "modulo", - "MOD", "modulo", - "DIV", "intDiv", - nullptr -}; - -const char * ParserUnaryExpression::operators[] = -{ - "-", "negate", - "NOT", "not", - nullptr -}; - -const char * ParserAdditiveExpression::operators[] = -{ - "+", "plus", - "-", "minus", - nullptr -}; - -const char * ParserComparisonExpression::operators[] = -{ - "==", "equals", - "!=", "notEquals", - "<>", "notEquals", - "<=", "lessOrEquals", - ">=", "greaterOrEquals", - "<", "less", - ">", "greater", - "=", "equals", - "LIKE", "like", - "ILIKE", "ilike", - "NOT LIKE", "notLike", - "NOT ILIKE", "notILike", - "IN", "in", - "NOT IN", "notIn", - "GLOBAL IN", "globalIn", - "GLOBAL NOT IN", "globalNotIn", - nullptr -}; - -const char * ParserComparisonExpression::overlapping_operators_to_skip[] = -{ - "IN PARTITION", - nullptr -}; - -const char * ParserLogicalNotExpression::operators[] = -{ - "NOT", "not", - nullptr -}; - -const char * ParserArrayElementExpression::operators[] = -{ - "[", "arrayElement", - nullptr -}; - -const char * ParserTupleElementExpression::operators[] = -{ - ".", "tupleElement", - nullptr -}; + extern const int SYNTAX_ERROR; +} bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) @@ -298,7 +239,7 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node if (first) { ASTPtr elem; - if (!first_elem_parser->parse(pos, elem, expected)) + if (!elem_parser->parse(pos, elem, expected)) return false; node = elem; @@ -307,16 +248,7 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node else { /// try to find any of the valid operators - const char ** it; - Expected stub; - for (it = overlapping_operators_to_skip; *it; ++it) - if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) - break; - - if (*it) - break; - for (it = operators; *it; it += 2) if (parseOperator(pos, *it, expected)) break; @@ -324,40 +256,13 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node if (!*it) break; - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - ASTPtr elem; - SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; - if (comparison_expression) - { - if (ParserKeyword("ANY").ignore(pos, expected)) - subquery_function_type = SubqueryFunctionType::ANY; - else if (ParserKeyword("ALL").ignore(pos, expected)) - subquery_function_type = SubqueryFunctionType::ALL; - } - - if (subquery_function_type != SubqueryFunctionType::NONE && !ParserSubquery().parse(pos, elem, expected)) - subquery_function_type = SubqueryFunctionType::NONE; - - if (subquery_function_type == SubqueryFunctionType::NONE - && !(remaining_elem_parser ? remaining_elem_parser : first_elem_parser)->parse(pos, elem, expected)) + if (!elem_parser->parse(pos, elem, expected)) return false; /// the first argument of the function is the previous element, the second is the next one - function->name = it[1]; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(node); - exp_list->children.push_back(elem); - - if (comparison_expression && subquery_function_type != SubqueryFunctionType::NONE && !modifyAST(function, subquery_function_type)) - return false; + auto function = makeASTFunction(it[1], node, elem); /** special exception for the access operator to the element of the array `x[y]`, which * contains the infix part '[' and the suffix ''] '(specified as' [') @@ -381,369 +286,24 @@ bool ParserLeftAssociativeBinaryOperatorList::parseImpl(Pos & pos, ASTPtr & node } -bool ParserVariableArityOperatorList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +ASTPtr makeBetweenOperator(bool negative, ASTs arguments) { - ASTPtr arguments; + // SUBJECT = arguments[0], LEFT = arguments[1], RIGHT = arguments[2] - if (!elem_parser->parse(pos, node, expected)) - return false; - - while (true) + if (negative) { - if (!parseOperator(pos, infix, expected)) - break; - - if (!arguments) - { - node = makeASTFunction(function_name, node); - arguments = node->as().arguments; - } - - ASTPtr elem; - if (!elem_parser->parse(pos, elem, expected)) - return false; - - arguments->children.push_back(elem); - } - - return true; -} - -bool ParserBetweenExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// For the expression (subject [NOT] BETWEEN left AND right) - /// create an AST the same as for (subject >= left AND subject <= right). - - ParserKeyword s_not("NOT"); - ParserKeyword s_between("BETWEEN"); - ParserKeyword s_and("AND"); - - ASTPtr subject; - ASTPtr left; - ASTPtr right; - - if (!elem_parser.parse(pos, subject, expected)) - return false; - - bool negative = s_not.ignore(pos, expected); - - if (!s_between.ignore(pos, expected)) - { - if (negative) - --pos; - - /// No operator was parsed, just return element. - node = subject; + auto f_left_expr = makeASTFunction("less", arguments[0], arguments[1]); + auto f_right_expr = makeASTFunction("greater", arguments[0], arguments[2]); + return makeASTFunction("or", f_left_expr, f_right_expr); } else { - if (!elem_parser.parse(pos, left, expected)) - return false; - - if (!s_and.ignore(pos, expected)) - return false; - - if (!elem_parser.parse(pos, right, expected)) - return false; - - auto f_combined_expression = std::make_shared(); - auto args_combined_expression = std::make_shared(); - - /// [NOT] BETWEEN left AND right - auto f_left_expr = std::make_shared(); - auto args_left_expr = std::make_shared(); - - auto f_right_expr = std::make_shared(); - auto args_right_expr = std::make_shared(); - - args_left_expr->children.emplace_back(subject); - args_left_expr->children.emplace_back(left); - - args_right_expr->children.emplace_back(subject); - args_right_expr->children.emplace_back(right); - - if (negative) - { - /// NOT BETWEEN - f_left_expr->name = "less"; - f_right_expr->name = "greater"; - f_combined_expression->name = "or"; - } - else - { - /// BETWEEN - f_left_expr->name = "greaterOrEquals"; - f_right_expr->name = "lessOrEquals"; - f_combined_expression->name = "and"; - } - - f_left_expr->arguments = args_left_expr; - f_left_expr->children.emplace_back(f_left_expr->arguments); - - f_right_expr->arguments = args_right_expr; - f_right_expr->children.emplace_back(f_right_expr->arguments); - - args_combined_expression->children.emplace_back(f_left_expr); - args_combined_expression->children.emplace_back(f_right_expr); - - f_combined_expression->arguments = args_combined_expression; - f_combined_expression->children.emplace_back(f_combined_expression->arguments); - - node = f_combined_expression; + auto f_left_expr = makeASTFunction("greaterOrEquals", arguments[0], arguments[1]); + auto f_right_expr = makeASTFunction("lessOrEquals", arguments[0], arguments[2]); + return makeASTFunction("and", f_left_expr, f_right_expr); } - - return true; } -bool ParserTernaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserToken symbol1(TokenType::QuestionMark); - ParserToken symbol2(TokenType::Colon); - - ASTPtr elem_cond; - ASTPtr elem_then; - ASTPtr elem_else; - - if (!elem_parser.parse(pos, elem_cond, expected)) - return false; - - if (!symbol1.ignore(pos, expected)) - node = elem_cond; - else - { - if (!elem_parser.parse(pos, elem_then, expected)) - return false; - - if (!symbol2.ignore(pos, expected)) - return false; - - if (!elem_parser.parse(pos, elem_else, expected)) - return false; - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - function->name = "if"; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(elem_cond); - exp_list->children.push_back(elem_then); - exp_list->children.push_back(elem_else); - - node = function; - } - - return true; -} - - -bool ParserLambdaExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ParserToken arrow(TokenType::Arrow); - ParserToken open(TokenType::OpeningRoundBracket); - ParserToken close(TokenType::ClosingRoundBracket); - - Pos begin = pos; - - do - { - ASTPtr inner_arguments; - ASTPtr expression; - - bool was_open = false; - - if (open.ignore(pos, expected)) - { - was_open = true; - } - - if (!ParserList(std::make_unique(), std::make_unique(TokenType::Comma)).parse(pos, inner_arguments, expected)) - break; - - if (was_open) - { - if (!close.ignore(pos, expected)) - break; - } - - if (!arrow.ignore(pos, expected)) - break; - - if (!elem_parser.parse(pos, expression, expected)) - return false; - - /// lambda(tuple(inner_arguments), expression) - - auto lambda = std::make_shared(); - node = lambda; - lambda->name = "lambda"; - - auto outer_arguments = std::make_shared(); - lambda->arguments = outer_arguments; - lambda->children.push_back(lambda->arguments); - - auto tuple = std::make_shared(); - outer_arguments->children.push_back(tuple); - tuple->name = "tuple"; - tuple->arguments = inner_arguments; - tuple->children.push_back(inner_arguments); - - outer_arguments->children.push_back(expression); - - return true; - } - while (false); - - pos = begin; - return elem_parser.parse(pos, node, expected); -} - - -bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - if (ParserTableFunctionView().parse(pos, node, expected)) - return true; - ParserKeyword s_settings("SETTINGS"); - if (s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (parser_settings.parse(pos, node, expected)) - return true; - } - return elem_parser.parse(pos, node, expected); -} - - -bool ParserPrefixUnaryOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// try to find any of the valid operators - const char ** it; - for (it = operators; *it; it += 2) - { - if (parseOperator(pos, *it, expected)) - break; - } - - /// Let's parse chains of the form `NOT NOT x`. This is hack. - /** This is done, because among the unary operators there is only a minus and NOT. - * But for a minus the chain of unary operators does not need to be supported. - */ - size_t count = 1; - if (it[0] && 0 == strncmp(it[0], "NOT", 3)) - { - while (true) - { - const char ** jt; - for (jt = operators; *jt; jt += 2) - if (parseOperator(pos, *jt, expected)) - break; - - if (!*jt) - break; - - ++count; - } - } - - ASTPtr elem; - if (!elem_parser->parse(pos, elem, expected)) - return false; - - if (!*it) - node = elem; - else - { - for (size_t i = 0; i < count; ++i) - { - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - function->name = it[1]; - function->arguments = exp_list; - function->children.push_back(exp_list); - - if (node) - exp_list->children.push_back(node); - else - exp_list->children.push_back(elem); - - node = function; - } - } - - return true; -} - - -bool ParserUnaryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - /// As an exception, negative numbers should be parsed as literals, and not as an application of the operator. - - if (pos->type == TokenType::Minus) - { - Pos begin = pos; - if (ParserCastOperator().parse(pos, node, expected)) - return true; - - pos = begin; - if (ParserLiteral().parse(pos, node, expected)) - return true; - - pos = begin; - } - - return operator_parser.parse(pos, node, expected); -} - - -bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr expr_ast; - if (!elem_parser->parse(pos, expr_ast, expected)) - return false; - - ASTPtr type_ast; - if (ParserToken(TokenType::DoubleColon).ignore(pos, expected) - && ParserDataType().parse(pos, type_ast, expected)) - { - node = createFunctionCast(expr_ast, type_ast); - } - else - { - node = expr_ast; - } - - return true; -} - - -bool ParserArrayElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected &expected) -{ - return ParserLeftAssociativeBinaryOperatorList{ - operators, - std::make_unique(std::make_unique()), - std::make_unique(false) - }.parse(pos, node, expected); -} - - -bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected &expected) -{ - return ParserLeftAssociativeBinaryOperatorList{ - operators, - std::make_unique(std::make_unique()), - std::make_unique() - }.parse(pos, node, expected); -} - - ParserExpressionWithOptionalAlias::ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function) : impl(std::make_unique( is_table_function ? ParserPtr(std::make_unique()) : ParserPtr(std::make_unique()), @@ -760,7 +320,6 @@ bool ParserExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expect .parse(pos, node, expected); } - bool ParserNotEmptyExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { return nested_parser.parse(pos, node, expected) && !node->children.empty(); @@ -836,179 +395,6 @@ bool ParserTTLExpressionList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp } -bool ParserNullityChecking::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - ASTPtr node_comp; - if (!elem_parser.parse(pos, node_comp, expected)) - return false; - - ParserKeyword s_is{"IS"}; - ParserKeyword s_not{"NOT"}; - ParserKeyword s_null{"NULL"}; - - if (s_is.ignore(pos, expected)) - { - bool is_not = false; - if (s_not.ignore(pos, expected)) - is_not = true; - - if (!s_null.ignore(pos, expected)) - return false; - - auto args = std::make_shared(); - args->children.push_back(node_comp); - - auto function = std::make_shared(); - function->name = is_not ? "isNotNull" : "isNull"; - function->arguments = args; - function->children.push_back(function->arguments); - - node = function; - } - else - node = node_comp; - - return true; -} - -bool ParserDateOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no DATE keyword, go to the nested parser. - if (!ParserKeyword("DATE").ignore(pos, expected)) - return next_parser.parse(pos, node, expected); - - ASTPtr expr; - if (!ParserStringLiteral().parse(pos, expr, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - /// the first argument of the function is the previous element, the second is the next one - function->name = "toDate"; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(expr); - - node = function; - return true; -} - -bool ParserTimestampOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no TIMESTAMP keyword, go to the nested parser. - if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) - return next_parser.parse(pos, node, expected); - - ASTPtr expr; - if (!ParserStringLiteral().parse(pos, expr, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - /// the first argument of the function is the previous element, the second is the next one - function->name = "toDateTime"; - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(expr); - - node = function; - return true; -} - -bool ParserIntervalOperatorExpression::parseArgumentAndIntervalKind( - Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected) -{ - auto begin = pos; - auto init_expected = expected; - ASTPtr string_literal; - //// A String literal followed INTERVAL keyword, - /// the literal can be a part of an expression or - /// include Number and INTERVAL TYPE at the same time - if (ParserStringLiteral{}.parse(pos, string_literal, expected)) - { - String literal; - if (string_literal->as().value.tryGet(literal)) - { - Tokens tokens(literal.data(), literal.data() + literal.size()); - Pos token_pos(tokens, 0); - Expected token_expected; - - if (!ParserNumber{}.parse(token_pos, expr, token_expected)) - return false; - else - { - /// case: INTERVAL '1' HOUR - /// back to begin - if (!token_pos.isValid()) - { - pos = begin; - expected = init_expected; - } - else - /// case: INTERVAL '1 HOUR' - return parseIntervalKind(token_pos, token_expected, interval_kind); - } - } - } - // case: INTERVAL expr HOUR - if (!ParserExpressionWithOptionalAlias(false).parse(pos, expr, expected)) - return false; - return parseIntervalKind(pos, expected, interval_kind); -} - -bool ParserIntervalOperatorExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) -{ - auto begin = pos; - - /// If no INTERVAL keyword, go to the nested parser. - if (!ParserKeyword("INTERVAL").ignore(pos, expected)) - return next_parser.parse(pos, node, expected); - - ASTPtr expr; - IntervalKind interval_kind; - if (!parseArgumentAndIntervalKind(pos, expr, interval_kind, expected)) - { - pos = begin; - return next_parser.parse(pos, node, expected); - } - - /// the function corresponding to the operator - auto function = std::make_shared(); - - /// function arguments - auto exp_list = std::make_shared(); - - /// the first argument of the function is the previous element, the second is the next one - function->name = interval_kind.toNameOfFunctionToIntervalDataType(); - function->arguments = exp_list; - function->children.push_back(exp_list); - - exp_list->children.push_back(expr); - - node = function; - return true; -} - bool ParserKeyValuePair::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserIdentifier id_parser; @@ -1053,4 +439,2091 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return parser.parse(pos, node, expected); } + +enum class Action +{ + NONE, + OPERAND, + OPERATOR +}; + +/** Operator types are needed for special handling of certain operators. + * Operators can be grouped into some type if they have similar behaviour. + * Certain operators are unique in terms of their behaviour, so they are assigned a separate type. + */ +enum class OperatorType +{ + None, + Comparison, + Mergeable, + ArrayElement, + TupleElement, + IsNull, + StartBetween, + StartNotBetween, + FinishBetween, + StartIf, + FinishIf, + Cast, + Lambda +}; + +/** Operator struct stores parameters of the operator: + * - function_name name of the function that operator will create + * - priority priority of the operator relative to the other operators + * - arity the amount of arguments that operator will consume + * - type type of the operator that defines its behaviour + */ +struct Operator +{ + Operator() = default; + + Operator(const std::string & function_name_, int priority_, int arity_ = 2, OperatorType type_ = OperatorType::None) + : type(type_), priority(priority_), arity(arity_), function_name(function_name_) {} + + OperatorType type; + int priority; + int arity; + std::string function_name; +}; + +enum class Checkpoint +{ + None, + Interval, + Case +}; + +/** Layer is a class that represents context for parsing certain element, + * that consists of other elements e.g. f(x1, x2, x3) + * + * - Manages operands and operators for the future elements (arguments) + * - Combines operands and operator into one element + * - Parsers separators and endings + * - Combines resulting elements into a function + */ + +class Layer +{ +public: + explicit Layer(bool allow_alias_ = true, bool allow_alias_without_as_keyword_ = true) : + allow_alias(allow_alias_), allow_alias_without_as_keyword(allow_alias_without_as_keyword_) + { + } + + virtual ~Layer() = default; + + bool popOperator(Operator & op) + { + if (operators.empty()) + return false; + + op = std::move(operators.back()); + operators.pop_back(); + + return true; + } + + void pushOperator(Operator op) + { + operators.push_back(std::move(op)); + } + + bool popOperand(ASTPtr & op) + { + if (operands.empty()) + return false; + + op = std::move(operands.back()); + operands.pop_back(); + + return true; + } + + void pushOperand(ASTPtr op) + { + operands.push_back(std::move(op)); + } + + void pushResult(ASTPtr op) + { + elements.push_back(std::move(op)); + } + + virtual bool getResult(ASTPtr & node) + { + if (elements.size() == 1) + { + node = std::move(elements[0]); + return true; + } + + return false; + } + + virtual bool parse(IParser::Pos & /*pos*/, Expected & /*expected*/, Action & /*action*/) = 0; + + bool isFinished() const + { + return finished; + } + + int previousPriority() const + { + if (operators.empty()) + return 0; + + return operators.back().priority; + } + + OperatorType previousType() const + { + if (operators.empty()) + return OperatorType::None; + + return operators.back().type; + } + + int isCurrentElementEmpty() const + { + return operators.empty() && operands.empty(); + } + + bool popLastNOperands(ASTs & asts, size_t n) + { + if (n > operands.size()) + return false; + + asts.reserve(asts.size() + n); + + auto start = operands.begin() + operands.size() - n; + asts.insert(asts.end(), std::make_move_iterator(start), std::make_move_iterator(operands.end())); + operands.erase(start, operands.end()); + + return true; + } + + /// Merge operators and operands into a single element (column), then push it to 'elements' vector. + /// Operators are previously sorted in ascending order of priority + /// (operator with priority 1 has higher priority than operator with priority 2), + /// so we can just merge them with operands starting from the end. + /// + /// If we fail here it means that the query was incorrect and we should return an error. + /// + bool mergeElement(bool push_to_elements = true) + { + Operator cur_op; + while (popOperator(cur_op)) + { + ASTPtr function; + + // Special case of ternary operator + if (cur_op.type == OperatorType::StartIf) + return false; + + if (cur_op.type == OperatorType::FinishIf) + { + Operator tmp; + if (!popOperator(tmp) || tmp.type != OperatorType::StartIf) + return false; + } + + // Special case of a BETWEEN b AND c operator + if (cur_op.type == OperatorType::StartBetween || cur_op.type == OperatorType::StartNotBetween) + return false; + + if (cur_op.type == OperatorType::FinishBetween) + { + Operator tmp_op; + if (!popOperator(tmp_op)) + return false; + + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return false; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; + + ASTs arguments; + if (!popLastNOperands(arguments, 3)) + return false; + + function = makeBetweenOperator(negative, arguments); + } + else + { + function = makeASTFunction(cur_op.function_name); + + if (!popLastNOperands(function->children[0]->children, cur_op.arity)) + return false; + } + + pushOperand(function); + } + + ASTPtr node; + if (!popOperand(node)) + return false; + + bool res = isCurrentElementEmpty(); + + if (push_to_elements) + pushResult(node); + else + pushOperand(node); + + return res; + } + + bool parseLambda() + { + // 0. If empty - create function tuple with 0 args + if (isCurrentElementEmpty()) + { + auto function = makeASTFunction("tuple"); + pushOperand(function); + return true; + } + + if (operands.size() != 1 || !operators.empty() || !mergeElement()) + return false; + + /// 1. If there is already tuple do nothing + if (tryGetFunctionName(elements.back()) == "tuple") + { + pushOperand(elements.back()); + elements.pop_back(); + } + /// 2. Put all elements in a single tuple + else + { + auto function = makeASTFunction("tuple", elements); + elements.clear(); + pushOperand(function); + } + return true; + } + + /// Put 'node' identifier into the last operand as its alias + bool insertAlias(ASTPtr node) + { + if (!mergeElement(false)) + return false; + + if (operands.empty()) + return false; + + if (auto * ast_with_alias = dynamic_cast(operands.back().get())) + { + tryGetIdentifierNameInto(node, ast_with_alias->alias); + return true; + } + + return false; + } + + bool is_table_function = false; + + /// 'AND' in operator '... BETWEEN ... AND ...' mirrors logical operator 'AND'. + /// In order to distinguish them we keep a counter of BETWEENs without matching ANDs. + int between_counter = 0; + + bool allow_alias = true; + bool allow_alias_without_as_keyword = true; + + std::optional> saved_checkpoint; + Checkpoint current_checkpoint = Checkpoint::None; + +protected: + std::vector operators; + ASTs operands; + ASTs elements; + bool finished = false; + int state = 0; +}; + + +class ExpressionLayer : public Layer +{ +public: + + explicit ExpressionLayer(bool is_table_function_) : Layer(false, false) + { + is_table_function = is_table_function_; + } + + bool getResult(ASTPtr & node) override + { + /// We can exit the main cycle outside the parse() function, + /// so we need to merge the element here + if (!mergeElement()) + return false; + + if (elements.size() == 1) + { + node = std::move(elements[0]); + return true; + } + + return false; + } + + bool parse(IParser::Pos & pos, Expected & /*expected*/, Action & /*action*/) override + { + if (pos->type == TokenType::Comma) + finished = true; + + return true; + } +}; + + +/// Basic layer for a function with certain separator and end tokens: +/// 1. If we parse a separator we should merge current operands and operators +/// into one element and push in to 'elements' vector. +/// 2. If we parse an ending token, we should merge everything as in (1) and +/// also set 'finished' flag. +template +class BaseLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (ParserToken(separator).ignore(pos, expected)) + { + action = Action::OPERAND; + return mergeElement(); + } + + if (ParserToken(end).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!isCurrentElementEmpty() || !elements.empty()) + if (!mergeElement()) + return false; + + finished = true; + } + + return true; + } +}; + + +class OrdinaryFunctionLayer : public Layer +{ +public: + explicit OrdinaryFunctionLayer(String function_name_, bool allow_function_parameters_ = true) + : function_name(function_name_), allow_function_parameters(allow_function_parameters_){} + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// | 0 | 1 | 2 | + /// f(ALL ...)(ALL ...) FILTER ... + /// + /// 0. Parse ALL and DISTINCT qualifiers (-> 1) + /// 1. Parse all the arguments and ending token (-> 2), possibly with parameters list (-> 1) + /// 2. Create function, possibly parse FILTER and OVER window definitions (finished) + + if (state == 0) + { + state = 1; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_all = false; + has_distinct = false; + } + } + + contents_begin = pos->begin; + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + return mergeElement(); + } + + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!isCurrentElementEmpty() || !elements.empty()) + if (!mergeElement()) + return false; + + contents_end = pos->begin; + + /** Check for a common error case - often due to the complexity of quoting command-line arguments, + * an expression of the form toDate(2014-01-01) appears in the query instead of toDate('2014-01-01'). + * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, + * and the query silently returns an unexpected elements. + */ + if (function_name == "toDate" + && contents_end - contents_begin == strlen("2014-01-01") + && contents_begin[0] >= '2' && contents_begin[0] <= '3' + && contents_begin[1] >= '0' && contents_begin[1] <= '9' + && contents_begin[2] >= '0' && contents_begin[2] <= '9' + && contents_begin[3] >= '0' && contents_begin[3] <= '9' + && contents_begin[4] == '-' + && contents_begin[5] >= '0' && contents_begin[5] <= '9' + && contents_begin[6] >= '0' && contents_begin[6] <= '9' + && contents_begin[7] == '-' + && contents_begin[8] >= '0' && contents_begin[8] <= '9' + && contents_begin[9] >= '0' && contents_begin[9] <= '9') + { + std::string contents_str(contents_begin, contents_end - contents_begin); + throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" + , ErrorCodes::SYNTAX_ERROR); + } + + if (allow_function_parameters && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + parameters = std::make_shared(); + std::swap(parameters->children, elements); + action = Action::OPERAND; + + /// Parametric aggregate functions cannot have DISTINCT in parameters list. + if (has_distinct) + return false; + + auto pos_after_bracket = pos; + auto old_expected = expected; + + ParserKeyword all("ALL"); + ParserKeyword distinct("DISTINCT"); + + if (all.ignore(pos, expected)) + has_all = true; + + if (distinct.ignore(pos, expected)) + has_distinct = true; + + if (!has_all && all.ignore(pos, expected)) + has_all = true; + + if (has_all && has_distinct) + return false; + + if (has_all || has_distinct) + { + /// case f(ALL), f(ALL, x), f(DISTINCT), f(DISTINCT, x), ALL and DISTINCT should be treat as identifier + if (pos->type == TokenType::Comma || pos->type == TokenType::ClosingRoundBracket) + { + pos = pos_after_bracket; + expected = old_expected; + has_distinct = false; + } + } + } + else + { + state = 2; + } + } + } + + if (state == 2) + { + if (has_distinct) + function_name += "Distinct"; + + auto function_node = makeASTFunction(function_name, std::move(elements)); + + if (parameters) + { + function_node->parameters = parameters; + function_node->children.push_back(function_node->parameters); + } + + ParserKeyword filter("FILTER"); + ParserKeyword over("OVER"); + + if (filter.ignore(pos, expected)) + { + // We are slightly breaking the parser interface by parsing the window + // definition into an existing ASTFunction. Normally it would take a + // reference to ASTPtr and assign it the new node. We only have a pointer + // of a different type, hence this workaround with a temporary pointer. + ASTPtr function_node_as_iast = function_node; + + // Recursion + ParserFilterClause filter_parser; + if (!filter_parser.parse(pos, function_node_as_iast, expected)) + return false; + } + + if (over.ignore(pos, expected)) + { + function_node->is_window_function = true; + + ASTPtr function_node_as_iast = function_node; + + // Recursion + ParserWindowReference window_reference; + if (!window_reference.parse(pos, function_node_as_iast, expected)) + return false; + } + + elements = {function_node}; + finished = true; + } + + return true; + } + +private: + bool has_all = false; + bool has_distinct = false; + + const char * contents_begin; + const char * contents_end; + + String function_name; + ASTPtr parameters; + + bool allow_function_parameters; +}; + +/// Layer for priority brackets and tuple function +class RoundBracketsLayer : public Layer +{ +public: + bool getResult(ASTPtr & node) override + { + // Round brackets can mean priority operator as well as function tuple() + if (!is_tuple && elements.size() == 1) + node = std::move(elements[0]); + else + node = makeASTFunction("tuple", std::move(elements)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + is_tuple = true; + if (!mergeElement()) + return false; + } + + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + action = Action::OPERATOR; + + if (!isCurrentElementEmpty()) + if (!mergeElement()) + return false; + + // Special case for (('a', 'b')) -> tuple(('a', 'b')) + if (!is_tuple && elements.size() == 1) + if (auto * literal = elements[0]->as()) + if (literal->value.getType() == Field::Types::Tuple) + is_tuple = true; + + finished = true; + } + + return true; + } +private: + bool is_tuple = false; +}; + +/// Layer for array square brackets operator +class ArrayLayer : public BaseLayer +{ +public: + bool getResult(ASTPtr & node) override + { + node = makeASTFunction("array", std::move(elements)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return BaseLayer::parse(pos, expected, action); + } +}; + +/// Layer for arrayElement square brackets operator +/// This layer does not create a function, it is only needed to parse closing token +/// and return only one element. +class ArrayElementLayer : public BaseLayer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + return BaseLayer::parse(pos, expected, action); + } +}; + +class CastLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// CAST(x [AS alias1], T [AS alias2]) or CAST(x [AS alias1] AS T) + /// + /// 0. Parse all the cases (-> 1) + /// 1. Parse closing token (finished) + + ParserKeyword as_keyword_parser("AS"); + ASTPtr alias; + + /// expr AS type + if (state == 0) + { + ASTPtr type_node; + + if (as_keyword_parser.ignore(pos, expected)) + { + auto old_pos = pos; + + if (ParserIdentifier().parse(pos, alias, expected) && + as_keyword_parser.ignore(pos, expected) && + ParserDataType().parse(pos, type_node, expected) && + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!insertAlias(alias)) + return false; + + if (!mergeElement()) + return false; + + elements = {createFunctionCast(elements[0], type_node)}; + finished = true; + return true; + } + + pos = old_pos; + + if (ParserIdentifier().parse(pos, alias, expected) && + ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + if (!insertAlias(alias)) + return false; + + if (!mergeElement()) + return false; + + state = 1; + return true; + } + + pos = old_pos; + + if (ParserDataType().parse(pos, type_node, expected) && + ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + elements = {createFunctionCast(elements[0], type_node)}; + finished = true; + return true; + } + + return false; + } + + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 1; + return true; + } + } + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + elements = {makeASTFunction("CAST", elements[0], elements[1])}; + finished = true; + return true; + } + } + + return true; + } +}; + +class ExtractLayer : public BaseLayer +{ +public: + bool getResult(ASTPtr & node) override + { + if (state == 2) + { + if (elements.empty()) + return false; + + node = makeASTFunction(interval_kind.toNameOfFunctionExtractTimePart(), elements[0]); + } + else + { + node = makeASTFunction("extract", std::move(elements)); + } + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// extract(haystack, pattern) or EXTRACT(DAY FROM Date) + /// + /// 0. If we parse interval_kind and 'FROM' keyword (-> 2), otherwise (-> 1) + /// 1. Basic parser + /// 2. Parse closing bracket (finished) + + if (state == 0) + { + IParser::Pos begin = pos; + ParserKeyword s_from("FROM"); + + if (parseIntervalKind(pos, expected, interval_kind) && s_from.ignore(pos, expected)) + { + state = 2; + return true; + } + else + { + state = 1; + pos = begin; + } + } + + if (state == 1) + { + return BaseLayer::parse(pos, expected, action); + } + + if (state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + return true; + } + } + + return true; + } + +private: + IntervalKind interval_kind; +}; + +class SubstringLayer : public Layer +{ +public: + bool getResult(ASTPtr & node) override + { + node = makeASTFunction("substring", std::move(elements)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// Either SUBSTRING(expr FROM start [FOR length]) or SUBSTRING(expr, start, length) + /// + /// 0: Parse first separator: FROM or comma (-> 1) + /// 1: Parse second separator: FOR or comma (-> 2) + /// 1 or 2: Parse closing bracket (finished) + + if (state == 0) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected) || + ParserKeyword("FROM").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 1; + } + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected) || + ParserKeyword("FOR").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 2; + } + } + + if (state == 1 || state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + + return true; + } +}; + +class PositionLayer : public Layer +{ +public: + bool getResult(ASTPtr & node) override + { + if (state == 2) + std::swap(elements[1], elements[0]); + + node = makeASTFunction("position", std::move(elements)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// position(haystack, needle[, start_pos]) or position(needle IN haystack) + /// + /// 0: Parse separator: comma (-> 1) or IN (-> 2) + /// 1: Parse second separator: comma + /// 1 or 2: Parse closing bracket (finished) + + if (state == 0) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 1; + } + if (ParserKeyword("IN").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + state = 2; + } + } + + if (state == 1) + { + if (ParserToken(TokenType::Comma).ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + } + } + + if (state == 1 || state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + + return true; + } +}; + + +class ExistsLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + ASTPtr node; + + // Recursion + if (!ParserSelectWithUnionQuery().parse(pos, node, expected)) + return false; + + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + auto subquery = std::make_shared(); + subquery->children.push_back(node); + elements = {makeASTFunction("exists", subquery)}; + + finished = true; + + return true; + } +}; + +class TrimLayer : public Layer +{ +public: + TrimLayer(bool trim_left_, bool trim_right_) : trim_left(trim_left_), trim_right(trim_right_) + { + } + + bool getResult(ASTPtr & node) override + { + node = makeASTFunction(function_name, std::move(elements)); + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// Handles all possible TRIM/LTRIM/RTRIM call variants + /// + /// 0: If flags 'trim_left' and 'trim_right' are set (-> 2). + /// If not, try to parse 'BOTH', 'LEADING', 'TRAILING' keywords, + /// then if char_override (-> 1), else (-> 2) + /// 1. Parse 'FROM' keyword (-> 2) + /// 2. Parse closing token, choose name, add arguments (finished) + + if (state == 0) + { + if (!trim_left && !trim_right) + { + if (ParserKeyword("BOTH").ignore(pos, expected)) + { + trim_left = true; + trim_right = true; + char_override = true; + } + else if (ParserKeyword("LEADING").ignore(pos, expected)) + { + trim_left = true; + char_override = true; + } + else if (ParserKeyword("TRAILING").ignore(pos, expected)) + { + trim_right = true; + char_override = true; + } + else + { + trim_left = true; + trim_right = true; + } + + if (char_override) + state = 1; + else + state = 2; + } + else + { + state = 2; + } + } + + if (state == 1) + { + if (ParserKeyword("FROM").ignore(pos, expected)) + { + action = Action::OPERAND; + + if (!mergeElement()) + return false; + + to_remove = makeASTFunction("regexpQuoteMeta", elements[0]); + elements.clear(); + state = 2; + } + } + + if (state == 2) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + ASTPtr pattern_node; + + if (char_override) + { + auto pattern_func_node = std::make_shared(); + auto pattern_list_args = std::make_shared(); + if (trim_left && trim_right) + { + pattern_list_args->children = { + std::make_shared("^["), + to_remove, + std::make_shared("]+|["), + to_remove, + std::make_shared("]+$") + }; + function_name = "replaceRegexpAll"; + } + else + { + if (trim_left) + { + pattern_list_args->children = { + std::make_shared("^["), + to_remove, + std::make_shared("]+") + }; + } + else + { + /// trim_right == false not possible + pattern_list_args->children = { + std::make_shared("["), + to_remove, + std::make_shared("]+$") + }; + } + function_name = "replaceRegexpOne"; + } + + pattern_func_node->name = "concat"; + pattern_func_node->arguments = std::move(pattern_list_args); + pattern_func_node->children.push_back(pattern_func_node->arguments); + + pattern_node = std::move(pattern_func_node); + } + else + { + if (trim_left && trim_right) + { + function_name = "trimBoth"; + } + else + { + if (trim_left) + function_name = "trimLeft"; + else + function_name = "trimRight"; + } + } + + if (char_override) + { + elements.push_back(pattern_node); + elements.push_back(std::make_shared("")); + } + + finished = true; + } + } + + return true; + } +private: + bool trim_left; + bool trim_right; + bool char_override = false; + + ASTPtr to_remove; + String function_name; +}; + + +class DateAddLayer : public BaseLayer +{ +public: + explicit DateAddLayer(const char * function_name_) : function_name(function_name_) + { + } + + bool getResult(ASTPtr & node) override + { + if (parsed_interval_kind) + { + elements[0] = makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements[0]); + node = makeASTFunction(function_name, elements[1], elements[0]); + } + else + node = makeASTFunction(function_name, std::move(elements)); + + return true; + } + + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// DATEADD(YEAR, 1, date) or DATEADD(INTERVAL 1 YEAR, date); + /// + /// 0. Try to parse interval_kind (-> 1) + /// 1. Basic parser + + if (state == 0) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) + return false; + + action = Action::OPERAND; + parsed_interval_kind = true; + } + + state = 1; + } + + if (state == 1) + { + return BaseLayer::parse(pos, expected, action); + } + + return true; + } + +private: + IntervalKind interval_kind; + const char * function_name; + bool parsed_interval_kind = false; +}; + + +class DateDiffLayer : public BaseLayer +{ +public: + bool getResult(ASTPtr & node) override + { + if (parsed_interval_kind) + { + if (elements.size() == 2) + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1]); + else if (elements.size() == 3) + node = makeASTFunction("dateDiff", std::make_shared(interval_kind.toDateDiffUnit()), elements[0], elements[1], elements[2]); + else + return false; + } + else + { + node = makeASTFunction("dateDiff", std::move(elements)); + } + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// 0. Try to parse interval_kind (-> 1) + /// 1. Basic parser + + if (state == 0) + { + if (parseIntervalKind(pos, expected, interval_kind)) + { + parsed_interval_kind = true; + + if (!ParserToken(TokenType::Comma).ignore(pos, expected)) + return false; + } + + state = 1; + } + + if (state == 1) + { + return BaseLayer::parse(pos, expected, action); + } + + return true; + } + +private: + IntervalKind interval_kind; + bool parsed_interval_kind = false; +}; + + +class IntervalLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// INTERVAL 1 HOUR or INTERVAL expr HOUR + /// + /// 0. Try to parse interval_kind (-> 1) + /// 1. Basic parser + + if (state == 0) + { + auto begin = pos; + auto init_expected = expected; + ASTPtr string_literal; + //// A String literal followed INTERVAL keyword, + /// the literal can be a part of an expression or + /// include Number and INTERVAL TYPE at the same time + if (ParserStringLiteral{}.parse(pos, string_literal, expected)) + { + String literal; + if (string_literal->as().value.tryGet(literal)) + { + Tokens tokens(literal.data(), literal.data() + literal.size()); + IParser::Pos token_pos(tokens, 0); + Expected token_expected; + ASTPtr expr; + + if (!ParserNumber{}.parse(token_pos, expr, token_expected)) + { + return false; + } + else + { + /// case: INTERVAL '1' HOUR + /// back to begin + if (!token_pos.isValid()) + { + pos = begin; + expected = init_expected; + } + else + { + /// case: INTERVAL '1 HOUR' + if (!parseIntervalKind(token_pos, token_expected, interval_kind)) + return false; + + elements = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), expr)}; + finished = true; + return true; + } + } + } + } + state = 1; + return true; + } + + if (state == 1) + { + if (action == Action::OPERATOR && parseIntervalKind(pos, expected, interval_kind)) + { + if (!mergeElement()) + return false; + + elements = {makeASTFunction(interval_kind.toNameOfFunctionToIntervalDataType(), elements)}; + finished = true; + } + } + + return true; + } + +private: + IntervalKind interval_kind; +}; + +/// Layer for table function 'view' and 'viewIfPermitted' +class ViewLayer : public Layer +{ +public: + explicit ViewLayer(bool if_permitted_) : if_permitted(if_permitted_) {} + + bool getResult(ASTPtr & node) override + { + if (if_permitted) + node = makeASTFunction("viewIfPermitted", std::move(elements)); + else + node = makeASTFunction("view", std::move(elements)); + + return true; + } + + bool parse(IParser::Pos & pos, Expected & expected, Action & /*action*/) override + { + /// view(SELECT ...) + /// viewIfPermitted(SELECT ... ELSE func(...)) + /// + /// 0. Parse the SELECT query and if 'if_permitted' parse 'ELSE' keyword (-> 1) else (finished) + /// 1. Parse closing token + + if (state == 0) + { + ASTPtr query; + + bool maybe_an_subquery = pos->type == TokenType::OpeningRoundBracket; + + if (!ParserSelectWithUnionQuery().parse(pos, query, expected)) + return false; + + auto & select_ast = query->as(); + if (select_ast.list_of_selects->children.size() == 1 && maybe_an_subquery) + { + // It's an subquery. Bail out. + return false; + } + + pushResult(query); + + if (!if_permitted) + { + if (!ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + return false; + + finished = true; + return true; + } + + if (!ParserKeyword{"ELSE"}.ignore(pos, expected)) + return false; + + state = 1; + return true; + } + + if (state == 1) + { + if (ParserToken(TokenType::ClosingRoundBracket).ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + finished = true; + } + } + + return true; + } + +private: + bool if_permitted; +}; + + +class CaseLayer : public Layer +{ +public: + bool parse(IParser::Pos & pos, Expected & expected, Action & action) override + { + /// CASE [x] WHEN expr THEN expr [WHEN expr THEN expr [...]] [ELSE expr] END + /// + /// 0. Check if we have case expression [x] (-> 1) + /// 1. Parse keywords: WHEN (-> 2), ELSE (-> 3), END (finished) + /// 2. Parse THEN keyword (-> 1) + /// 3. Parse END keyword (finished) + + if (state == 0) + { + auto old_pos = pos; + has_case_expr = !ParserKeyword("WHEN").ignore(pos, expected); + pos = old_pos; + + state = 1; + } + + if (state == 1) + { + if (ParserKeyword("WHEN").ignore(pos, expected)) + { + if ((has_case_expr || !elements.empty()) && !mergeElement()) + return false; + + action = Action::OPERAND; + state = 2; + } + else if (ParserKeyword("ELSE").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + action = Action::OPERAND; + state = 3; + } + else if (ParserKeyword("END").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + Field field_with_null; + ASTLiteral null_literal(field_with_null); + elements.push_back(std::make_shared(null_literal)); + + if (has_case_expr) + elements = {makeASTFunction("caseWithExpression", elements)}; + else + elements = {makeASTFunction("multiIf", elements)}; + finished = true; + } + } + + if (state == 2) + { + if (ParserKeyword("THEN").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + action = Action::OPERAND; + state = 1; + } + } + + if (state == 3) + { + if (ParserKeyword("END").ignore(pos, expected)) + { + if (!mergeElement()) + return false; + + if (has_case_expr) + elements = {makeASTFunction("caseWithExpression", elements)}; + else + elements = {makeASTFunction("multiIf", elements)}; + + finished = true; + } + } + + return true; + } + +private: + bool has_case_expr; +}; + + +std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_function, bool allow_function_parameters_ = true) +{ + /// Special cases for expressions that look like functions but contain some syntax sugar: + + /// CAST, EXTRACT, POSITION, EXISTS + /// DATE_ADD, DATEADD, TIMESTAMPADD, DATE_SUB, DATESUB, TIMESTAMPSUB, + /// DATE_DIFF, DATEDIFF, TIMESTAMPDIFF, TIMESTAMP_DIFF, + /// SUBSTRING, TRIM, LTRIM, RTRIM, POSITION + + /// Can be parsed as a composition of functions, but the contents must be unwrapped: + /// POSITION(x IN y) -> POSITION(in(x, y)) -> POSITION(y, x) + + /// Can be parsed as a function, but not always: + /// CAST(x AS type) - alias has to be unwrapped + /// CAST(x AS type(params)) + + /// Can be parsed as a function, but some identifier arguments have special meanings. + /// DATE_ADD(MINUTE, x, y) -> addMinutes(x, y) + /// DATE_DIFF(MINUTE, x, y) + + /// Have keywords that have to processed explicitly: + /// EXTRACT(x FROM y) + /// TRIM(BOTH|LEADING|TRAILING x FROM y) + /// SUBSTRING(x FROM a) + /// SUBSTRING(x FROM a FOR b) + + String function_name = getIdentifierName(identifier); + String function_name_lowercase = Poco::toLower(function_name); + + if (is_table_function) + { + if (function_name_lowercase == "view") + return std::make_unique(false); + else if (function_name_lowercase == "viewifpermitted") + return std::make_unique(true); + } + + if (function_name_lowercase == "cast") + return std::make_unique(); + else if (function_name_lowercase == "extract") + return std::make_unique(); + else if (function_name_lowercase == "substring") + return std::make_unique(); + else if (function_name_lowercase == "position") + return std::make_unique(); + else if (function_name_lowercase == "exists") + return std::make_unique(); + else if (function_name_lowercase == "trim") + return std::make_unique(false, false); + else if (function_name_lowercase == "ltrim") + return std::make_unique(true, false); + else if (function_name_lowercase == "rtrim") + return std::make_unique(false, true); + else if (function_name_lowercase == "dateadd" || function_name_lowercase == "date_add" + || function_name_lowercase == "timestampadd" || function_name_lowercase == "timestamp_add") + return std::make_unique("plus"); + else if (function_name_lowercase == "datesub" || function_name_lowercase == "date_sub" + || function_name_lowercase == "timestampsub" || function_name_lowercase == "timestamp_sub") + return std::make_unique("minus"); + else if (function_name_lowercase == "datediff" || function_name_lowercase == "date_diff" + || function_name_lowercase == "timestampdiff" || function_name_lowercase == "timestamp_diff") + return std::make_unique(); + else if (function_name_lowercase == "grouping") + return std::make_unique(function_name_lowercase, allow_function_parameters_); + else + return std::make_unique(function_name, allow_function_parameters_); +} + + +bool ParseCastExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + IParser::Pos begin = pos; + + if (ParserCastOperator().parse(pos, node, expected)) + return true; + + pos = begin; + + /// As an exception, negative numbers should be parsed as literals, and not as an application of the operator. + if (pos->type == TokenType::Minus) + { + if (ParserLiteral().parse(pos, node, expected)) + return true; + } + return false; +} + +bool ParseDateOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no DATE keyword, go to the nested parser. + if (!ParserKeyword("DATE").ignore(pos, expected)) + return false; + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return false; + } + + node = makeASTFunction("toDate", expr); + return true; +} + +bool ParseTimestampOperatorExpression(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + auto begin = pos; + + /// If no TIMESTAMP keyword, go to the nested parser. + if (!ParserKeyword("TIMESTAMP").ignore(pos, expected)) + return false; + + ASTPtr expr; + if (!ParserStringLiteral().parse(pos, expr, expected)) + { + pos = begin; + return false; + } + + node = makeASTFunction("toDateTime", expr); + + return true; +} + +struct ParserExpressionImpl +{ + static std::vector> operators_table; + static std::vector> unary_operators_table; + static const char * overlapping_operators_to_skip[]; + + static Operator finish_between_operator; + + ParserCompoundIdentifier identifier_parser{false, true}; + ParserNumber number_parser; + ParserAsterisk asterisk_parser; + ParserLiteral literal_parser; + ParserTupleOfLiterals tuple_literal_parser; + ParserArrayOfLiterals array_literal_parser; + ParserSubstitution substitution_parser; + ParserMySQLGlobalVariable mysql_global_variable_parser; + + ParserKeyword any_parser{"ANY"}; + ParserKeyword all_parser{"ALL"}; + + // Recursion + ParserQualifiedAsterisk qualified_asterisk_parser; + ParserColumnsMatcher columns_matcher_parser; + ParserSubquery subquery_parser; + + bool parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected); + + using Layers = std::vector>; + + Action tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected); + static Action tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected); +}; + + +bool ParserExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto start = std::make_unique(false); + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); +} + +bool ParserTableFunctionExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto start = std::make_unique(true); + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); +} + +bool ParserArray::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto start = std::make_unique(); + return ParserToken(TokenType::OpeningSquareBracket).ignore(pos, expected) + && ParserExpressionImpl().parse(std::move(start), pos, node, expected); +} + +bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr identifier; + + if (ParserIdentifier(true).parse(pos, identifier, expected) + && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); + start->is_table_function = is_table_function; + return ParserExpressionImpl().parse(std::move(start), pos, node, expected); + } + else + { + return false; + } +} + +std::vector> ParserExpressionImpl::operators_table({ + {"->", Operator("lambda", 1, 2, OperatorType::Lambda)}, + {"?", Operator("", 2, 0, OperatorType::StartIf)}, + {":", Operator("if", 3, 3, OperatorType::FinishIf)}, + {"OR", Operator("or", 3, 2, OperatorType::Mergeable)}, + {"AND", Operator("and", 4, 2, OperatorType::Mergeable)}, + {"BETWEEN", Operator("", 6, 0, OperatorType::StartBetween)}, + {"NOT BETWEEN", Operator("", 6, 0, OperatorType::StartNotBetween)}, + {"IS NULL", Operator("isNull", 8, 1, OperatorType::IsNull)}, + {"IS NOT NULL", Operator("isNotNull", 8, 1, OperatorType::IsNull)}, + {"==", Operator("equals", 9, 2, OperatorType::Comparison)}, + {"!=", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<>", Operator("notEquals", 9, 2, OperatorType::Comparison)}, + {"<=", Operator("lessOrEquals", 9, 2, OperatorType::Comparison)}, + {">=", Operator("greaterOrEquals", 9, 2, OperatorType::Comparison)}, + {"<", Operator("less", 9, 2, OperatorType::Comparison)}, + {">", Operator("greater", 9, 2, OperatorType::Comparison)}, + {"=", Operator("equals", 9, 2, OperatorType::Comparison)}, + {"LIKE", Operator("like", 9)}, + {"ILIKE", Operator("ilike", 9)}, + {"NOT LIKE", Operator("notLike", 9)}, + {"NOT ILIKE", Operator("notILike", 9)}, + {"IN", Operator("in", 9)}, + {"NOT IN", Operator("notIn", 9)}, + {"GLOBAL IN", Operator("globalIn", 9)}, + {"GLOBAL NOT IN", Operator("globalNotIn", 9)}, + {"||", Operator("concat", 10, 2, OperatorType::Mergeable)}, + {"+", Operator("plus", 11)}, + {"-", Operator("minus", 11)}, + {"*", Operator("multiply", 12)}, + {"/", Operator("divide", 12)}, + {"%", Operator("modulo", 12)}, + {"MOD", Operator("modulo", 12)}, + {"DIV", Operator("intDiv", 12)}, + {".", Operator("tupleElement", 14, 2, OperatorType::TupleElement)}, + {"[", Operator("arrayElement", 14, 2, OperatorType::ArrayElement)}, + {"::", Operator("CAST", 14, 2, OperatorType::Cast)}, + }); + +std::vector> ParserExpressionImpl::unary_operators_table({ + {"NOT", Operator("not", 5, 1)}, + {"-", Operator("negate", 13, 1)} + }); + +Operator ParserExpressionImpl::finish_between_operator = Operator("", 7, 0, OperatorType::FinishBetween); + +const char * ParserExpressionImpl::overlapping_operators_to_skip[] = +{ + "IN PARTITION", + nullptr +}; + +bool ParserExpressionImpl::parse(std::unique_ptr start, IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + Action next = Action::OPERAND; + + Layers layers; + layers.push_back(std::move(start)); + + while (true) + { + while (pos.isValid()) + { + if (!layers.back()->parse(pos, expected, next)) + break; + + if (layers.back()->isFinished()) + { + if (layers.size() == 1) + break; + + next = Action::OPERATOR; + + ASTPtr res; + if (!layers.back()->getResult(res)) + break; + + layers.pop_back(); + layers.back()->pushOperand(res); + continue; + } + + if (next == Action::OPERAND) + next = tryParseOperand(layers, pos, expected); + else + next = tryParseOperator(layers, pos, expected); + + if (next == Action::NONE) + break; + } + + /// When we exit the loop we should be on the 1st level + if (layers.size() == 1 && layers.back()->getResult(node)) + return true; + + layers.pop_back(); + + /// We try to check whether there was a checkpoint + while (!layers.empty() && !layers.back()->saved_checkpoint) + layers.pop_back(); + + if (layers.empty()) + return false; + + /// Currently all checkpoints are located in operand section + next = Action::OPERAND; + + auto saved_checkpoint = layers.back()->saved_checkpoint.value(); + layers.back()->saved_checkpoint.reset(); + + pos = saved_checkpoint.first; + layers.back()->current_checkpoint = saved_checkpoint.second; + } +} + +Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos, Expected & expected) +{ + ASTPtr tmp; + + if (layers.front()->is_table_function) + { + if (typeid_cast(layers.back().get())) + { + if (identifier_parser.parse(pos, tmp, expected) + && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) + { + layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); + return Action::OPERAND; + } + return Action::NONE; + } + + /// Current element should be empty (there should be no other operands or operators) + /// to parse SETTINGS in table function + if (layers.back()->isCurrentElementEmpty()) + { + auto old_pos = pos; + ParserKeyword s_settings("SETTINGS"); + if (s_settings.ignore(pos, expected)) + { + ParserSetQuery parser_settings(true); + if (parser_settings.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(tmp); + return Action::OPERAND; + } + else + { + pos = old_pos; + } + } + } + } + + /// Special case for cast expression + if (layers.back()->previousType() != OperatorType::TupleElement && + ParseCastExpression(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return Action::OPERATOR; + } + + if (layers.back()->previousType() == OperatorType::Comparison) + { + SubqueryFunctionType subquery_function_type = SubqueryFunctionType::NONE; + + if (any_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ANY; + else if (all_parser.ignore(pos, expected) && subquery_parser.parse(pos, tmp, expected)) + subquery_function_type = SubqueryFunctionType::ALL; + + if (subquery_function_type != SubqueryFunctionType::NONE) + { + Operator prev_op; + ASTPtr function, argument; + + if (!layers.back()->popOperator(prev_op)) + return Action::NONE; + if (!layers.back()->popOperand(argument)) + return Action::NONE; + + function = makeASTFunction(prev_op.function_name, argument, tmp); + + if (!modifyAST(function, subquery_function_type)) + return Action::NONE; + + layers.back()->pushOperand(std::move(function)); + return Action::OPERATOR; + } + } + + /// Try to find any unary operators + auto cur_op = unary_operators_table.begin(); + for (; cur_op != unary_operators_table.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op != unary_operators_table.end()) + { + layers.back()->pushOperator(cur_op->second); + return Action::OPERAND; + } + + auto old_pos = pos; + auto current_checkpoint = layers.back()->current_checkpoint; + layers.back()->current_checkpoint = Checkpoint::None; + + if (current_checkpoint != Checkpoint::Interval && parseOperator(pos, "INTERVAL", expected)) + { + layers.back()->saved_checkpoint = {old_pos, Checkpoint::Interval}; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (current_checkpoint != Checkpoint::Case && parseOperator(pos, "CASE", expected)) + { + layers.back()->saved_checkpoint = {old_pos, Checkpoint::Case}; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + + if (ParseDateOperatorExpression(pos, tmp, expected) || + ParseTimestampOperatorExpression(pos, tmp, expected) || + tuple_literal_parser.parse(pos, tmp, expected) || + array_literal_parser.parse(pos, tmp, expected) || + number_parser.parse(pos, tmp, expected) || + literal_parser.parse(pos, tmp, expected) || + asterisk_parser.parse(pos, tmp, expected) || + qualified_asterisk_parser.parse(pos, tmp, expected) || + columns_matcher_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else if (identifier_parser.parse(pos, tmp, expected)) + { + if (pos->type == TokenType::OpeningRoundBracket) + { + ++pos; + layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); + return Action::OPERAND; + } + else + { + layers.back()->pushOperand(std::move(tmp)); + } + } + else if (substitution_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else if (pos->type == TokenType::OpeningRoundBracket) + { + if (subquery_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return Action::OPERATOR; + } + + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else + { + return Action::NONE; + } + + return Action::OPERATOR; +} + +Action ParserExpressionImpl::tryParseOperator(Layers & layers, IParser::Pos & pos, Expected & expected) +{ + ASTPtr tmp; + + /// ParserExpression can be called in this part of the query: + /// ALTER TABLE partition_all2 CLEAR INDEX [ p ] IN PARTITION ALL + /// + /// 'IN PARTITION' here is not an 'IN' operator, so we should stop parsing immediately + Expected stub; + for (const char ** it = overlapping_operators_to_skip; *it; ++it) + if (ParserKeyword{*it}.checkWithoutMoving(pos, stub)) + return Action::NONE; + + /// Try to find operators from 'operators_table' + auto cur_op = operators_table.begin(); + for (; cur_op != operators_table.end(); ++cur_op) + { + if (parseOperator(pos, cur_op->first, expected)) + break; + } + + if (cur_op == operators_table.end()) + { + auto old_pos = pos; + if (layers.back()->allow_alias && ParserAlias(layers.back()->allow_alias_without_as_keyword).parse(pos, tmp, expected)) + { + if (layers.back()->insertAlias(tmp)) + return Action::OPERATOR; + } + pos = old_pos; + return Action::NONE; + } + + auto op = cur_op->second; + + if (op.type == OperatorType::Lambda) + { + if (!layers.back()->parseLambda()) + return Action::NONE; + + layers.back()->pushOperator(op); + return Action::OPERAND; + } + + /// 'AND' can be both boolean function and part of the '... BETWEEN ... AND ...' operator + if (op.function_name == "and" && layers.back()->between_counter) + { + layers.back()->between_counter--; + op = finish_between_operator; + } + + while (layers.back()->previousPriority() >= op.priority) + { + ASTPtr function; + Operator prev_op; + layers.back()->popOperator(prev_op); + + /// Mergeable operators are operators that are merged into one function: + /// For example: 'a OR b OR c' -> 'or(a, b, c)' and not 'or(or(a,b), c)' + if (prev_op.type == OperatorType::Mergeable && op.function_name == prev_op.function_name) + { + op.arity += prev_op.arity - 1; + break; + } + + if (prev_op.type == OperatorType::FinishBetween) + { + Operator tmp_op; + if (!layers.back()->popOperator(tmp_op)) + return Action::NONE; + + if (tmp_op.type != OperatorType::StartBetween && tmp_op.type != OperatorType::StartNotBetween) + return Action::NONE; + + bool negative = tmp_op.type == OperatorType::StartNotBetween; + + ASTs arguments; + if (!layers.back()->popLastNOperands(arguments, 3)) + return Action::NONE; + + function = makeBetweenOperator(negative, arguments); + } + else + { + function = makeASTFunction(prev_op.function_name); + + if (!layers.back()->popLastNOperands(function->children[0]->children, prev_op.arity)) + return Action::NONE; + } + + layers.back()->pushOperand(function); + } + + layers.back()->pushOperator(op); + + if (op.type == OperatorType::ArrayElement) + layers.push_back(std::make_unique()); + + + Action next = Action::OPERAND; + + /// isNull & isNotNull are postfix unary operators + if (op.type == OperatorType::IsNull) + next = Action::OPERATOR; + + if (op.type == OperatorType::StartBetween || op.type == OperatorType::StartNotBetween) + layers.back()->between_counter++; + + if (op.type == OperatorType::Cast) + { + next = Action::OPERATOR; + + ASTPtr type_ast; + if (!ParserDataType().parse(pos, type_ast, expected)) + return Action::NONE; + + layers.back()->pushOperand(std::make_shared(queryToString(type_ast))); + } + + return next; +} + } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 05c7ec946ee..653654e5a33 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -116,6 +116,36 @@ private: SelectUnionModes union_modes; }; + +class ParserArray : public IParserBase +{ +protected: + const char * getName() const override { return "array"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +/** A function, for example, f(x, y + 1, g(z)). + * Or an aggregate function: sum(x + f(y)), corr(x, y). The syntax is the same as the usual function. + * Or a parametric aggregate function: quantile(0.9)(x + y). + * Syntax - two pairs of parentheses instead of one. The first is for parameters, the second for arguments. + * For functions, the DISTINCT modifier can be specified, for example, count(DISTINCT x, y). + */ +class ParserFunction : public IParserBase +{ +public: + explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) + : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) + { + } + +protected: + const char * getName() const override { return "function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + bool allow_function_parameters; + bool is_table_function; +}; + + /** An expression with an infix binary left-associative operator. * For example, a + b - c + d. */ @@ -123,31 +153,13 @@ class ParserLeftAssociativeBinaryOperatorList : public IParserBase { private: Operators_t operators; - Operators_t overlapping_operators_to_skip = { (const char *[]){ nullptr } }; - ParserPtr first_elem_parser; - ParserPtr remaining_elem_parser; - /// =, !=, <, > ALL (subquery) / ANY (subquery) - bool comparison_expression = false; + ParserPtr elem_parser; public: /** `operators_` - allowed operators and their corresponding functions */ - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && first_elem_parser_) - : operators(operators_), first_elem_parser(std::move(first_elem_parser_)) - { - } - - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, - Operators_t overlapping_operators_to_skip_, ParserPtr && first_elem_parser_, bool comparison_expression_ = false) - : operators(operators_), overlapping_operators_to_skip(overlapping_operators_to_skip_), - first_elem_parser(std::move(first_elem_parser_)), comparison_expression(comparison_expression_) - { - } - - ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && first_elem_parser_, - ParserPtr && remaining_elem_parser_) - : operators(operators_), first_elem_parser(std::move(first_elem_parser_)), - remaining_elem_parser(std::move(remaining_elem_parser_)) + ParserLeftAssociativeBinaryOperatorList(Operators_t operators_, ParserPtr && elem_parser_) + : operators(operators_), elem_parser(std::move(elem_parser_)) { } @@ -158,295 +170,8 @@ protected: }; -/** Expression with an infix operator of arbitrary arity. - * For example, a AND b AND c AND d. - */ -class ParserVariableArityOperatorList : public IParserBase +class ParserExpression : public IParserBase { -private: - const char * infix; - const char * function_name; - ParserPtr elem_parser; - -public: - ParserVariableArityOperatorList(const char * infix_, const char * function_, ParserPtr && elem_parser_) - : infix(infix_), function_name(function_), elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "list, delimited by operator of variable arity"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -/** An expression with a prefix unary operator. - * Example, NOT x. - */ -class ParserPrefixUnaryOperatorExpression : public IParserBase -{ -private: - Operators_t operators; - ParserPtr elem_parser; - -public: - /** `operators_` - allowed operators and their corresponding functions - */ - ParserPrefixUnaryOperatorExpression(Operators_t operators_, ParserPtr && elem_parser_) - : operators(operators_), elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "expression with prefix unary operator"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -/// CAST operator "::". This parser is used if left argument -/// of operator cannot be read as simple literal from text of query. -/// Example: "[1, 1 + 1, 1 + 2]::Array(UInt8)" -class ParserCastExpression : public IParserBase -{ -private: - ParserPtr elem_parser; - -public: - explicit ParserCastExpression(ParserPtr && elem_parser_) - : elem_parser(std::move(elem_parser_)) - { - } - -protected: - const char * getName() const override { return "CAST expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserArrayElementExpression : public IParserBase -{ -private: - static const char * operators[]; - -protected: - const char * getName() const override{ return "array element expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserTupleElementExpression : public IParserBase -{ -private: - static const char * operators[]; - -protected: - const char * getName() const override { return "tuple element expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserUnaryExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique(std::make_unique())}; - -protected: - const char * getName() const override { return "unary expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserMultiplicativeExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique()}; - -protected: - const char * getName() const override { return "multiplicative expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - -/// DATE operator. "DATE '2001-01-01'" would be parsed as "toDate('2001-01-01')". -class ParserDateOperatorExpression : public IParserBase -{ -protected: - ParserMultiplicativeExpression next_parser; - - const char * getName() const override { return "DATE operator expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -/// TIMESTAMP operator. "TIMESTAMP '2001-01-01 12:34:56'" would be parsed as "toDateTime('2001-01-01 12:34:56')". -class ParserTimestampOperatorExpression : public IParserBase -{ -protected: - ParserDateOperatorExpression next_parser; - - const char * getName() const override { return "TIMESTAMP operator expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - -/// Optional conversion to INTERVAL data type. Example: "INTERVAL x SECOND" parsed as "toIntervalSecond(x)". -class ParserIntervalOperatorExpression : public IParserBase -{ -protected: - ParserTimestampOperatorExpression next_parser; - - const char * getName() const override { return "INTERVAL operator expression"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - -private: - static bool parseArgumentAndIntervalKind(Pos & pos, ASTPtr & expr, IntervalKind & interval_kind, Expected & expected); -}; - -class ParserAdditiveExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, std::make_unique()}; - -protected: - const char * getName() const override { return "additive expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserConcatExpression : public IParserBase -{ - ParserVariableArityOperatorList operator_parser {"||", "concat", std::make_unique()}; - -protected: - const char * getName() const override { return "string concatenation expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserBetweenExpression : public IParserBase -{ -private: - ParserConcatExpression elem_parser; - -protected: - const char * getName() const override { return "BETWEEN expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserComparisonExpression : public IParserBase -{ -private: - static const char * operators[]; - static const char * overlapping_operators_to_skip[]; - ParserLeftAssociativeBinaryOperatorList operator_parser {operators, - overlapping_operators_to_skip, std::make_unique(), true}; - -protected: - const char * getName() const override{ return "comparison expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - -/** Parser for nullity checking with IS (NOT) NULL. - */ -class ParserNullityChecking : public IParserBase -{ -private: - ParserComparisonExpression elem_parser; - -protected: - const char * getName() const override { return "nullity checking"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserLogicalNotExpression : public IParserBase -{ -private: - static const char * operators[]; - ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique()}; - -protected: - const char * getName() const override{ return "logical-NOT expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserLogicalAndExpression : public IParserBase -{ -private: - ParserVariableArityOperatorList operator_parser {"AND", "and", std::make_unique()}; - -protected: - const char * getName() const override { return "logical-AND expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -class ParserLogicalOrExpression : public IParserBase -{ -private: - ParserVariableArityOperatorList operator_parser {"OR", "or", std::make_unique()}; - -protected: - const char * getName() const override { return "logical-OR expression"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - return operator_parser.parse(pos, node, expected); - } -}; - - -/** An expression with ternary operator. - * For example, a = 1 ? b + 1 : c * 2. - */ -class ParserTernaryOperatorExpression : public IParserBase -{ -private: - ParserLogicalOrExpression elem_parser; - -protected: - const char * getName() const override { return "expression with ternary operator"; } - - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; -}; - - -class ParserLambdaExpression : public IParserBase -{ -private: - ParserTernaryOperatorExpression elem_parser; - protected: const char * getName() const override { return "lambda expression"; } @@ -457,9 +182,6 @@ protected: // It's used to parse expressions in table function. class ParserTableFunctionExpression : public IParserBase { -private: - ParserLambdaExpression elem_parser; - protected: const char * getName() const override { return "table function expression"; } @@ -467,13 +189,10 @@ protected: }; -using ParserExpression = ParserLambdaExpression; - - class ParserExpressionWithOptionalAlias : public IParserBase { public: - explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword, bool is_table_function = false); + explicit ParserExpressionWithOptionalAlias(bool allow_alias_without_as_keyword_, bool is_table_function_ = false); protected: ParserPtr impl; diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index f1557076a07..65740f290b5 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -138,14 +138,28 @@ void IAST::updateTreeHashImpl(SipHash & hash_state) const } -size_t IAST::checkDepthImpl(size_t max_depth, size_t level) const +size_t IAST::checkDepthImpl(size_t max_depth) const { - size_t res = level + 1; - for (const auto & child : children) + std::vector> stack; + stack.reserve(children.size()); + + for (const auto & i: children) + stack.push_back({i, 1}); + + size_t res = 0; + + while (!stack.empty()) { - if (level >= max_depth) + auto top = stack.back(); + stack.pop_back(); + + if (top.second >= max_depth) throw Exception("AST is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_AST); - res = std::max(res, child->checkDepthImpl(max_depth, level + 1)); + + res = std::max(res, top.second); + + for (const auto & i: top.first->children) + stack.push_back({i, top.second + 1}); } return res; diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index e91d419acd8..bba878e4769 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -92,7 +92,7 @@ public: */ size_t checkDepth(size_t max_depth) const { - return checkDepthImpl(max_depth, 0); + return checkDepthImpl(max_depth); } /** Get total number of tree elements @@ -273,7 +273,7 @@ public: static const char * hilite_none; private: - size_t checkDepthImpl(size_t max_depth, size_t level) const; + size_t checkDepthImpl(size_t max_depth) const; /** Forward linked list of ASTPtr to delete. * Used in IAST destructor to avoid possible stack overflow. diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp new file mode 100644 index 00000000000..3a399bdccdb --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -0,0 +1,26 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLFilter :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + String expr = getExprFromToken(pos); + ASTPtr where_expression; + + Tokens token_filter(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos pos_filter(token_filter, pos.max_depth); + if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLFilter.h b/src/Parsers/Kusto/ParserKQLFilter.h new file mode 100644 index 00000000000..19bb38a7fda --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLFilter.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLFilter : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL where"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp new file mode 100644 index 00000000000..bb8e08fd378 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLLimit :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr limit_length; + + auto expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(limit_length)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLLimit.h b/src/Parsers/Kusto/ParserKQLLimit.h new file mode 100644 index 00000000000..1585805f0fc --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLLimit.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLLimit : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL limit"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp new file mode 100644 index 00000000000..f8e4f9eaab0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -0,0 +1,359 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int SYNTAX_ERROR; +} + +String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Pos &token_pos,String kql_op, String ch_op) +{ + String new_expr; + Expected expected; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + auto haystack = tokens.back(); + + String logic_op = (kql_op == "has_all") ? " and " : " or "; + + while (!token_pos->isEnd() && token_pos->type != TokenType::PipeMark && token_pos->type != TokenType::Semicolon) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + if (token_pos->type == TokenType::Comma) + new_expr = new_expr + logic_op; + else + new_expr = new_expr + ch_op + "(" + haystack + "," + tmp_arg + ")"; + + ++token_pos; + if (token_pos->type == TokenType::ClosingRoundBracket) + break; + + } + + tokens.pop_back(); + return new_expr; +} + +String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ch_op) +{ + String new_expr; + + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + ASTPtr select; + Expected expected; + + ++token_pos; + if (!s_lparen.ignore(token_pos, expected)) + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + + --token_pos; + --token_pos; + return ch_op; + +} + +String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos) +{ + String new_expr, left_wildcards, right_wildcards, left_space, right_space; + + switch (wildcards_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_wildcards ="%"; + break; + + case WildcardsPos::right: + right_wildcards = "%"; + break; + + case WildcardsPos::both: + left_wildcards ="%"; + right_wildcards = "%"; + break; + } + + switch (space_pos) + { + case WildcardsPos::none: + break; + + case WildcardsPos::left: + left_space =" "; + break; + + case WildcardsPos::right: + right_space = " "; + break; + + case WildcardsPos::both: + left_space =" "; + right_space = " "; + break; + } + + ++token_pos; + + if (!tokens.empty() && ((token_pos)->type == TokenType::StringLiteral || token_pos->type == TokenType::QuotedIdentifier)) + new_expr = ch_op +"(" + tokens.back() +", '"+left_wildcards + left_space + String(token_pos->begin + 1,token_pos->end - 1) + right_space + right_wildcards + "')"; + else if (!tokens.empty() && ((token_pos)->type == TokenType::BareWord)) + { + auto tmp_arg = String(token_pos->begin, token_pos->end); + new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; + } + else + throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + tokens.pop_back(); + return new_expr; +} + +bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) +{ + auto begin = pos; + + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + KQLOperatorValue op_value = KQLOperatorValue::none; + + auto token = String(pos->begin,pos->end); + + String op = token; + if (token == "!") + { + ++pos; + if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) + throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + op ="!"+String(pos->begin,pos->end); + } + else if (token == "matches") + { + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "regex") + op +=" regex"; + else + --pos; + } + } + else + { + op = token; + } + + ++pos; + if (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin,pos->end) == "~") + op +="~"; + else + --pos; + } + else + --pos; + + if (KQLOperator.find(op) == KQLOperator.end()) + { + pos = begin; + return false; + } + + op_value = KQLOperator[op]; + + String new_expr; + + if (op_value == KQLOperatorValue::none) + tokens.push_back(op); + else + { + auto last_op = tokens.back(); + auto last_pos = pos; + + switch (op_value) + { + case KQLOperatorValue::contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::both); + break; + + case KQLOperatorValue::contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "like", WildcardsPos::both); + break; + + case KQLOperatorValue::not_contains_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not like", WildcardsPos::both); + break; + + case KQLOperatorValue::endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::not_endswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + break; + + case KQLOperatorValue::endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_endswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::equal: + break; + + case KQLOperatorValue::not_equal: + break; + + case KQLOperatorValue::equal_cs: + new_expr = "=="; + break; + + case KQLOperatorValue::not_equal_cs: + new_expr = "!="; + break; + case KQLOperatorValue::has: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasTokenCaseInsensitive", WildcardsPos::none); + break; + + case KQLOperatorValue::has_all: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_all", "hasTokenCaseInsensitive"); + break; + + case KQLOperatorValue::has_any: + new_expr = genHasAnyAllOpExpr(tokens, pos, "has_any", "hasTokenCaseInsensitive"); + break; + + case KQLOperatorValue::has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::not_has_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not hasToken", WildcardsPos::none); + break; + + case KQLOperatorValue::hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::not_hasprefix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::left); + break; + + case KQLOperatorValue::hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::left); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "ilike", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::left); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not ilike", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "endsWith", WildcardsPos::none); + new_expr += " or "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "like", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::not_hassuffix_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not endsWith", WildcardsPos::none); + new_expr += " and "; + tokens.push_back(last_op); + new_expr += genHaystackOpExpr(tokens, last_pos, op, "not like", WildcardsPos::both, WildcardsPos::right); + break; + + case KQLOperatorValue::in_cs: + new_expr = genInOpExpr(pos,op,"in"); + break; + + case KQLOperatorValue::not_in_cs: + new_expr = genInOpExpr(pos,op,"not in"); + break; + + case KQLOperatorValue::in: + break; + + case KQLOperatorValue::not_in: + break; + + case KQLOperatorValue::matches_regex: + new_expr = genHaystackOpExpr(tokens, pos, op, "match", WildcardsPos::none); + break; + + case KQLOperatorValue::startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::not_startswith: + new_expr = genHaystackOpExpr(tokens, pos, op, "not ilike", WildcardsPos::right); + break; + + case KQLOperatorValue::startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "startsWith", WildcardsPos::none); + break; + + case KQLOperatorValue::not_startswith_cs: + new_expr = genHaystackOpExpr(tokens, pos, op, "not startsWith", WildcardsPos::none); + break; + + default: + break; + } + + tokens.push_back(new_expr); + } + return true; + } + pos = begin; + return false; +} + +} + diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h new file mode 100644 index 00000000000..9796ae10c07 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -0,0 +1,106 @@ +#pragma once + +#include +#include +#include +namespace DB +{ + +class KQLOperators +{ +public: + bool convert(std::vector &tokens,IParser::Pos &pos); +protected: + + enum class WildcardsPos:uint8_t + { + none, + left, + right, + both + }; + + enum class KQLOperatorValue : uint16_t + { + none, + contains, + not_contains, + contains_cs, + not_contains_cs, + endswith, + not_endswith, + endswith_cs, + not_endswith_cs, + equal, //=~ + not_equal,//!~ + equal_cs, //= + not_equal_cs,//!= + has, + not_has, + has_all, + has_any, + has_cs, + not_has_cs, + hasprefix, + not_hasprefix, + hasprefix_cs, + not_hasprefix_cs, + hassuffix, + not_hassuffix, + hassuffix_cs, + not_hassuffix_cs, + in_cs, //in + not_in_cs, //!in + in, //in~ + not_in ,//!in~ + matches_regex, + startswith, + not_startswith, + startswith_cs, + not_startswith_cs, + }; + + std::unordered_map KQLOperator = + { + {"contains" , KQLOperatorValue::contains}, + {"!contains" , KQLOperatorValue::not_contains}, + {"contains_cs" , KQLOperatorValue::contains_cs}, + {"!contains_cs" , KQLOperatorValue::not_contains_cs}, + {"endswith" , KQLOperatorValue::endswith}, + {"!endswith" , KQLOperatorValue::not_endswith}, + {"endswith_cs" , KQLOperatorValue::endswith_cs}, + {"!endswith_cs" , KQLOperatorValue::not_endswith_cs}, + {"=~" , KQLOperatorValue::equal}, + {"!~" , KQLOperatorValue::not_equal}, + {"==" , KQLOperatorValue::equal_cs}, + {"!=" , KQLOperatorValue::not_equal_cs}, + {"has" , KQLOperatorValue::has}, + {"!has" , KQLOperatorValue::not_has}, + {"has_all" , KQLOperatorValue::has_all}, + {"has_any" , KQLOperatorValue::has_any}, + {"has_cs" , KQLOperatorValue::has_cs}, + {"!has_cs" , KQLOperatorValue::not_has_cs}, + {"hasprefix" , KQLOperatorValue::hasprefix}, + {"!hasprefix" , KQLOperatorValue::not_hasprefix}, + {"hasprefix_cs" , KQLOperatorValue::hasprefix_cs}, + {"!hasprefix_cs" , KQLOperatorValue::not_hasprefix_cs}, + {"hassuffix" , KQLOperatorValue::hassuffix}, + {"!hassuffix" , KQLOperatorValue::not_hassuffix}, + {"hassuffix_cs" , KQLOperatorValue::hassuffix_cs}, + {"!hassuffix_cs" , KQLOperatorValue::not_hassuffix_cs}, + {"in" , KQLOperatorValue::in_cs}, + {"!in" , KQLOperatorValue::not_in_cs}, + {"in~" , KQLOperatorValue::in}, + {"!in~" , KQLOperatorValue::not_in}, + {"matches regex" , KQLOperatorValue::matches_regex}, + {"startswith" , KQLOperatorValue::startswith}, + {"!startswith" , KQLOperatorValue::not_startswith}, + {"startswith_cs" , KQLOperatorValue::startswith_cs}, + {"!startswith_cs" , KQLOperatorValue::not_startswith_cs}, + }; + static String genHaystackOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op, WildcardsPos wildcards_pos, WildcardsPos space_pos = WildcardsPos::none); + static String genInOpExpr(IParser::Pos &token_pos,String kql_op, String ch_op); + static String genHasAnyAllOpExpr(std::vector &tokens,IParser::Pos &token_pos,String kql_op, String ch_op); +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp new file mode 100644 index 00000000000..e978323d821 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -0,0 +1,25 @@ +#include +#include +#include +namespace DB +{ + +bool ParserKQLProject :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + String expr; + + expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLProject.h b/src/Parsers/Kusto/ParserKQLProject.h new file mode 100644 index 00000000000..b64675beed0 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLProject.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLProject : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL project"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp new file mode 100644 index 00000000000..04ee36705a9 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -0,0 +1,342 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +String ParserKQLBase :: getExprFromToken(const String & text, const uint32_t & max_depth) +{ + Tokens tokens(text.c_str(), text.c_str() + text.size()); + IParser::Pos pos(tokens, max_depth); + + return getExprFromToken(pos); +} + +String ParserKQLBase :: getExprFromPipe(Pos & pos) +{ + uint16_t bracket_count = 0; + auto begin = pos; + auto end = pos; + while (!end->isEnd() && end->type != TokenType::Semicolon) + { + if (end->type == TokenType::OpeningRoundBracket) + ++bracket_count; + + if (end->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (end->type == TokenType::PipeMark && bracket_count == 0) + break; + + ++end; + } + --end; + return String(begin->begin, end->end); +} + +String ParserKQLBase :: getExprFromToken(Pos & pos) +{ + String res; + std::vector tokens; + String alias; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + String token = String(pos->begin,pos->end); + + if (token == "=") + { + ++pos; + if (String(pos->begin,pos->end) != "~") + { + alias = tokens.back(); + tokens.pop_back(); + } + --pos; + } + else if (!KQLOperators().convert(tokens,pos)) + { + tokens.push_back(token); + } + + if (pos->type == TokenType::Comma && !alias.empty()) + { + tokens.pop_back(); + tokens.push_back("AS"); + tokens.push_back(alias); + tokens.push_back(","); + alias.clear(); + } + ++pos; + } + + if (!alias.empty()) + { + tokens.push_back("AS"); + tokens.push_back(alias); + } + + for (auto const &token : tokens) + res = res.empty()? token : res +" " + token; + return res; +} + +std::unique_ptr ParserKQLQuery::getOperator(String & op_name) +{ + if (op_name == "filter" || op_name == "where") + return std::make_unique(); + else if (op_name == "limit" || op_name == "take") + return std::make_unique(); + else if (op_name == "project") + return std::make_unique(); + else if (op_name == "sort by" || op_name == "order by") + return std::make_unique(); + else if (op_name == "summarize") + return std::make_unique(); + else if (op_name == "table") + return std::make_unique(); + else + return nullptr; +} + +bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + struct KQLOperatorDataFlowState + { + String operator_name; + bool need_input; + bool gen_output; + int8_t backspace_steps; // how many steps to last token of previous pipe + }; + + auto select_query = std::make_shared(); + node = select_query; + ASTPtr tables; + + std::unordered_map kql_parser = + { + { "filter", {"filter", false, false, 3}}, + { "where", {"filter", false, false, 3}}, + { "limit", {"limit", false, true, 3}}, + { "take", {"limit", false, true, 3}}, + { "project", {"project", false, false, 3}}, + { "sort by", {"order by", false, false, 4}}, + { "order by", {"order by", false, false, 4}}, + { "table", {"table", false, false, 3}}, + { "summarize", {"summarize", true, true, 3}} + }; + + std::vector> operation_pos; + + String table_name(pos->begin, pos->end); + + operation_pos.push_back(std::make_pair("table", pos)); + ++pos; + uint16_t bracket_count = 0; + + while (!pos->isEnd() && pos->type != TokenType::Semicolon) + { + if (pos->type == TokenType::OpeningRoundBracket) + ++bracket_count; + if (pos->type == TokenType::OpeningRoundBracket) + --bracket_count; + + if (pos->type == TokenType::PipeMark && bracket_count == 0) + { + ++pos; + String kql_operator(pos->begin, pos->end); + if (kql_operator == "order" || kql_operator == "sort") + { + ++pos; + ParserKeyword s_by("by"); + if (s_by.ignore(pos,expected)) + { + kql_operator = "order by"; + --pos; + } + } + if (pos->type != TokenType::BareWord || kql_parser.find(kql_operator) == kql_parser.end()) + return false; + ++pos; + operation_pos.push_back(std::make_pair(kql_operator, pos)); + } + else + ++pos; + } + + auto kql_operator_str = operation_pos.back().first; + auto npos = operation_pos.back().second; + if (!npos.isValid()) + return false; + + auto kql_operator_p = getOperator(kql_operator_str); + + if (!kql_operator_p) + return false; + + if (operation_pos.size() == 1) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else if (operation_pos.size() == 2 && operation_pos.front().first == "table") + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + npos = operation_pos.front().second; + if (!ParserKQLTable().parse(npos, node, expected)) + return false; + } + else + { + String project_clause, order_clause, where_clause, limit_clause; + auto last_pos = operation_pos.back().second; + auto last_op = operation_pos.back().first; + + auto set_main_query_clause =[&](String & op, Pos & op_pos) + { + auto op_str = ParserKQLBase::getExprFromPipe(op_pos); + if (op == "project") + project_clause = op_str; + else if (op == "where" || op == "filter") + where_clause = where_clause.empty() ? std::format("({})", op_str) : where_clause + std::format("AND ({})", op_str); + else if (op == "limit" || op == "take") + limit_clause = op_str; + else if (op == "order by" || op == "sort by") + order_clause = order_clause.empty() ? op_str : order_clause + "," + op_str; + }; + + set_main_query_clause(last_op, last_pos); + + operation_pos.pop_back(); + + if (kql_parser[last_op].need_input) + { + if (!kql_operator_p->parse(npos, node, expected)) + return false; + } + else + { + while (!operation_pos.empty()) + { + auto prev_op = operation_pos.back().first; + auto prev_pos = operation_pos.back().second; + + if (kql_parser[prev_op].gen_output) + break; + if (!project_clause.empty() && prev_op == "project") + break; + set_main_query_clause(prev_op, prev_pos); + operation_pos.pop_back(); + last_op = prev_op; + last_pos = prev_pos; + } + } + + if (!operation_pos.empty()) + { + for (auto i = 0; i< kql_parser[last_op].backspace_steps; ++i) + --last_pos; + + String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); + Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); + IParser::Pos pos_subquery(token_subquery, pos.max_depth); + + if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) + return false; + select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + } + else + { + if (!ParserKQLTable().parse(last_pos, node, expected)) + return false; + } + + auto set_query_clasue =[&](String op_str, String op_calsue) + { + auto oprator = getOperator(op_str); + if (oprator) + { + Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); + IParser::Pos pos_clause(token_clause, pos.max_depth); + if (!oprator->parse(pos_clause, node, expected)) + return false; + } + return true; + }; + + if (!select_query->select()) + { + if (project_clause.empty()) + project_clause = "*"; + if (!set_query_clasue("project", project_clause)) + return false; + } + + if (!order_clause.empty()) + if (!set_query_clasue("order by", order_clause)) + return false; + + if (!where_clause.empty()) + if (!set_query_clasue("where", where_clause)) + return false; + + if (!limit_clause.empty()) + if (!set_query_clasue("limit", limit_clause)) + return false; + return true; + } + + if (!select_query->select()) + { + auto expr = String("*"); + Tokens tokens(expr.c_str(), expr.c_str()+expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + if (!std::make_unique()->parse(new_pos, node, expected)) + return false; + } + + return true; +} + +bool ParserKQLSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_node; + + if (!ParserKQLTaleFunction().parse(pos, select_node, expected)) + return false; + + ASTPtr node_subquery = std::make_shared(); + node_subquery->children.push_back(select_node); + + ASTPtr node_table_expr = std::make_shared(); + node_table_expr->as()->subquery = node_subquery; + + node_table_expr->children.emplace_back(node_subquery); + + ASTPtr node_table_in_select_query_emlement = std::make_shared(); + node_table_in_select_query_emlement->as()->table_expression = node_table_expr; + + ASTPtr res = std::make_shared(); + + res->children.emplace_back(node_table_in_select_query_emlement); + + node = res; + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h new file mode 100644 index 00000000000..42f5f84f031 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include + +namespace DB +{ +class ParserKQLBase : public IParserBase +{ +public: + static String getExprFromToken(Pos & pos); + static String getExprFromPipe(Pos & pos); + static String getExprFromToken(const String & text, const uint32_t & max_depth); +}; + +class ParserKQLQuery : public IParserBase +{ + +protected: + static std::unique_ptr getOperator(String &op_name); + const char * getName() const override { return "KQL query"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserKQLSubquery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL subquery"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp new file mode 100644 index 00000000000..f7540d729fd --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSort :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + bool has_dir = false; + std::vector has_directions; + ParserOrderByExpressionList order_list; + ASTPtr order_expression_list; + + auto expr = getExprFromToken(pos); + + Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); + IParser::Pos new_pos(tokens, pos.max_depth); + + auto pos_backup = new_pos; + if (!order_list.parse(pos_backup, order_expression_list, expected)) + return false; + + while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon) + { + String tmp(new_pos->begin, new_pos->end); + if (tmp == "desc" or tmp == "asc") + has_dir = true; + + if (new_pos->type == TokenType::Comma) + { + has_directions.push_back(has_dir); + has_dir = false; + } + ++new_pos; + } + has_directions.push_back(has_dir); + + for (uint64_t i = 0; i < order_expression_list->children.size(); ++i) + { + if (!has_directions[i]) + { + auto *order_expr = order_expression_list->children[i]->as(); + order_expr->direction = -1; // default desc + if (!order_expr->nulls_direction_was_explicitly_specified) + order_expr->nulls_direction = -1; + else + order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1; + } + } + + node->as()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSort.h b/src/Parsers/Kusto/ParserKQLSort.h new file mode 100644 index 00000000000..d9afefc196c --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSort.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSort : public ParserKQLBase +{ +protected: + const char * getName() const override { return "KQL order by"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp new file mode 100644 index 00000000000..21e480234d3 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLStatement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithOutput query_with_output_p(end, allow_settings_after_format_in_insert); + ParserSetQuery set_p; + + bool res = query_with_output_p.parse(pos, node, expected) + || set_p.parse(pos, node, expected); + + return res; +} + +bool ParserKQLWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + + ASTPtr query; + bool parsed = kql_p.parse(pos, query, expected); + + if (!parsed) + return false; + + node = std::move(query); + return true; +} + +bool ParserKQLWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr kql_query; + + if (!ParserKQLQuery().parse(pos, kql_query, expected)) + return false; + + if (kql_query->as()) + { + node = std::move(kql_query); + return true; + } + + auto list_node = std::make_shared(); + list_node->children.push_back(kql_query); + + auto select_with_union_query = std::make_shared(); + node = select_with_union_query; + select_with_union_query->list_of_selects = list_node; + select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + + return true; +} + +bool ParserKQLTaleFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKQLWithUnionQuery kql_p; + ASTPtr select; + ParserToken s_lparen(TokenType::OpeningRoundBracket); + + auto begin = pos; + auto paren_count = 0 ; + String kql_statement; + + if (s_lparen.ignore(pos, expected)) + { + ++paren_count; + while (!pos->isEnd()) + { + if (pos->type == TokenType::ClosingRoundBracket) + --paren_count; + if (pos->type == TokenType::OpeningRoundBracket) + ++paren_count; + + if (paren_count == 0) + break; + + kql_statement = kql_statement + " " + String(pos->begin,pos->end); + ++pos; + } + + Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); + IParser::Pos pos_kql(token_kql, pos.max_depth); + + if (kql_p.parse(pos_kql, select, expected)) + { + node = select; + ++pos; + return true; + } + } + pos = begin; + return false; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLStatement.h b/src/Parsers/Kusto/ParserKQLStatement.h new file mode 100644 index 00000000000..ef44b2d6c8a --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLStatement.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +namespace DB +{ + +class ParserKQLStatement : public IParserBase +{ +private: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL Statement"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLStatement(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + + +class ParserKQLWithOutput : public IParserBase +{ +protected: + const char * end; + bool allow_settings_after_format_in_insert; + const char * getName() const override { return "KQL with output"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +public: + explicit ParserKQLWithOutput(const char * end_, bool allow_settings_after_format_in_insert_ = false) + : end(end_) + , allow_settings_after_format_in_insert(allow_settings_after_format_in_insert_) + {} +}; + +class ParserKQLWithUnionQuery : public IParserBase +{ +protected: + const char * getName() const override { return "KQL query, possibly with UNION"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +class ParserKQLTaleFunction : public IParserBase +{ +protected: + const char * getName() const override { return "KQL() function"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp new file mode 100644 index 00000000000..75eacb1adbd --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +bool ParserKQLSummarize ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + ASTPtr select_expression_list; + ASTPtr group_expression_list; + + String expr_aggregation; + String expr_groupby; + String expr_columns; + bool groupby = false; + + auto begin = pos; + auto pos_groupby = pos; + + while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon) + { + if (String(pos->begin, pos->end) == "by") + { + groupby = true; + auto end = pos; + --end; + expr_aggregation = begin <= end ? String(begin->begin, end->end) : ""; + pos_groupby = pos; + ++pos_groupby; + } + ++pos; + } + --pos; + if (groupby) + expr_groupby = String(pos_groupby->begin, pos->end); + else + expr_aggregation = begin <= pos ? String(begin->begin, pos->end) : ""; + + auto expr_aggregation_str = expr_aggregation.empty() ? "" : expr_aggregation +","; + expr_columns = groupby ? expr_aggregation_str + expr_groupby : expr_aggregation_str; + + String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + + Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + + if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::SELECT, std::move(select_expression_list)); + + if (groupby) + { + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + + Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + + if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) + return false; + node->as()->setExpression(ASTSelectQuery::Expression::GROUP_BY, std::move(group_expression_list)); + } + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLSummarize.h b/src/Parsers/Kusto/ParserKQLSummarize.h new file mode 100644 index 00000000000..1aad02705df --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLSummarize.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLSummarize : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL summarize"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.cpp b/src/Parsers/Kusto/ParserKQLTable.cpp new file mode 100644 index 00000000000..6356ad688b6 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.cpp @@ -0,0 +1,55 @@ +#include +#include +#include +#include +#include +#include +namespace DB +{ + +bool ParserKQLTable :: parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + std::unordered_set sql_keywords + ({ + "SELECT", + "INSERT", + "CREATE", + "ALTER", + "SYSTEM", + "SHOW", + "GRANT", + "REVOKE", + "ATTACH", + "CHECK", + "DESCRIBE", + "DESC", + "DETACH", + "DROP", + "EXISTS", + "KILL", + "OPTIMIZE", + "RENAME", + "SET", + "TRUNCATE", + "USE", + "EXPLAIN" + }); + + ASTPtr tables; + String table_name(pos->begin,pos->end); + String table_name_upcase(table_name); + + std::transform(table_name_upcase.begin(), table_name_upcase.end(),table_name_upcase.begin(), toupper); + + if (sql_keywords.find(table_name_upcase) != sql_keywords.end()) + return false; + + if (!ParserTablesInSelectQuery().parse(pos, tables, expected)) + return false; + + node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables)); + + return true; +} + +} diff --git a/src/Parsers/Kusto/ParserKQLTable.h b/src/Parsers/Kusto/ParserKQLTable.h new file mode 100644 index 00000000000..c67dcb15156 --- /dev/null +++ b/src/Parsers/Kusto/ParserKQLTable.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParserKQLTable : public ParserKQLBase +{ + +protected: + const char * getName() const override { return "KQL Table"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index 747a13d46f7..892c0ad4718 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -338,7 +338,7 @@ Token Lexer::nextTokenImpl() ++pos; if (pos < end && *pos == '|') return Token(TokenType::Concatenation, token_begin, ++pos); - return Token(TokenType::ErrorSinglePipeMark, token_begin, pos); + return Token(TokenType::PipeMark, token_begin, pos); } case '@': { diff --git a/src/Parsers/Lexer.h b/src/Parsers/Lexer.h index ec472fb1a36..0c439ca0677 100644 --- a/src/Parsers/Lexer.h +++ b/src/Parsers/Lexer.h @@ -51,6 +51,7 @@ namespace DB M(Greater) \ M(LessOrEquals) \ M(GreaterOrEquals) \ + M(PipeMark) \ M(Concatenation) /** String concatenation operator: || */ \ \ M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \ diff --git a/src/Parsers/ParserCreateFunctionQuery.cpp b/src/Parsers/ParserCreateFunctionQuery.cpp index 08df6d8da7a..2b3cf98a8a7 100644 --- a/src/Parsers/ParserCreateFunctionQuery.cpp +++ b/src/Parsers/ParserCreateFunctionQuery.cpp @@ -20,7 +20,7 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp ParserKeyword s_on("ON"); ParserIdentifier function_name_p; ParserKeyword s_as("AS"); - ParserLambdaExpression lambda_p; + ParserExpression lambda_p; ASTPtr function_name; ASTPtr function_core; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 08240abe8c6..fc90f9ce3ed 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -152,7 +152,7 @@ bool ParserConstraintDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserKeyword s_assume("ASSUME"); ParserIdentifier name_p; - ParserLogicalOrExpression expression_p; + ParserExpression expression_p; ASTPtr name; ASTPtr expr; @@ -858,8 +858,8 @@ bool ParserCreateWindowViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & ParserStorage storage_p; ParserStorage storage_inner; ParserTablePropertiesDeclarationList table_properties_p; - ParserIntervalOperatorExpression watermark_p; - ParserIntervalOperatorExpression lateness_p; + ParserExpression watermark_p; + ParserExpression lateness_p; ParserSelectWithUnionQuery select_p; ASTPtr table; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index f56e0a4c3a0..b5480fa6eae 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -134,7 +134,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_type{"TYPE"}; ParserKeyword s_collate{"COLLATE"}; - ParserTernaryOperatorExpression expr_parser; + ParserExpression expr_parser; ParserStringLiteral string_literal_parser; ParserLiteral literal_parser; ParserCodec codec_parser; diff --git a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp index 44bb7fb6057..9910c73e541 100644 --- a/src/Parsers/ParserDictionaryAttributeDeclaration.cpp +++ b/src/Parsers/ParserDictionaryAttributeDeclaration.cpp @@ -20,7 +20,7 @@ bool ParserDictionaryAttributeDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_is_object_id{"IS_OBJECT_ID"}; ParserLiteral default_parser; ParserArrayOfLiterals array_literals_parser; - ParserTernaryOperatorExpression expression_parser; + ParserExpression expression_parser; /// mandatory attribute name ASTPtr name; diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp index 87630777fa6..4839ce73614 100644 --- a/src/Parsers/ParserExternalDDLQuery.cpp +++ b/src/Parsers/ParserExternalDDLQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 8137093b990..ef39df8ca52 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -226,7 +226,7 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec else if (ParserKeyword("ON").ignore(pos, expected)) { /// OR is operator with lowest priority, so start parsing from it. - if (!ParserLogicalOrExpression().parse(pos, table_join->on_expression, expected)) + if (!ParserExpression().parse(pos, table_join->on_expression, expected)) return false; } else diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 5b6d49e2741..b452bd27642 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,185 @@ INSTANTIATE_TEST_SUITE_P(ParserAttachUserQuery, ParserTest, "^$" } }))); + +INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, + ::testing::Combine( + ::testing::Values(std::make_shared()), + ::testing::ValuesIn(std::initializer_list{ + { + "Customers", + "SELECT *\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | limit 3", + "SELECT\n FirstName,\n LastName,\n Occupation\nFROM Customers\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 1 | take 3", + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 1\n)\nLIMIT 3" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | take 1", + "SELECT *\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)\nLIMIT 1" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName", + "SELECT\n FirstName,\n LastName\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + }, + { + "Customers | project FirstName,LastName,Occupation | take 3 | project FirstName,LastName,Education", + "SELECT\n FirstName,\n LastName,\n Education\nFROM\n(\n SELECT\n FirstName,\n LastName,\n Occupation\n FROM Customers\n LIMIT 3\n)" + }, + { + "Customers | sort by FirstName desc", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | take 3 | order by FirstName desc", + "SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC" + }, + { + "Customers | sort by FirstName asc", + "SELECT *\nFROM Customers\nORDER BY FirstName ASC" + }, + { + "Customers | sort by FirstName", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC" + }, + { + "Customers | order by LastName", + "SELECT *\nFROM Customers\nORDER BY LastName DESC" + }, + { + "Customers | order by Age desc , FirstName asc ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" + }, + { + "Customers | order by Age asc , FirstName desc", + "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName | order by Age ", + "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC" + }, + { + "Customers | sort by FirstName nulls first", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST" + }, + { + "Customers | sort by FirstName nulls last", + "SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST" + }, + { + "Customers | where Occupation == 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation = 'Skilled Manual'" + }, + { + "Customers | where Occupation != 'Skilled Manual'", + "SELECT *\nFROM Customers\nWHERE Occupation != 'Skilled Manual'" + }, + { + "Customers |where Education in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education IN ('Bachelors', 'High School')" + }, + { + "Customers | where Education !in ('Bachelors','High School')", + "SELECT *\nFROM Customers\nWHERE Education NOT IN ('Bachelors', 'High School')" + }, + { + "Customers |where Education contains_cs 'Degree'", + "SELECT *\nFROM Customers\nWHERE Education LIKE '%Degree%'" + }, + { + "Customers | where Occupation startswith_cs 'Skil'", + "SELECT *\nFROM Customers\nWHERE startsWith(Occupation, 'Skil')" + }, + { + "Customers | where FirstName endswith_cs 'le'", + "SELECT *\nFROM Customers\nWHERE endsWith(FirstName, 'le')" + }, + { + "Customers | where Age == 26", + "SELECT *\nFROM Customers\nWHERE Age = 26" + }, + { + "Customers | where Age > 20 and Age < 30", + "SELECT *\nFROM Customers\nWHERE (Age > 20) AND (Age < 30)" + }, + { + "Customers | where Age > 30 | where Education == 'Bachelors'", + "SELECT *\nFROM Customers\nWHERE (Education = 'Bachelors') AND (Age > 30)" + }, + { + "Customers |summarize count() by Occupation", + "SELECT\n count(),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize sum(Age) by Occupation", + "SELECT\n sum(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize avg(Age) by Occupation", + "SELECT\n avg(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers|summarize min(Age) by Occupation", + "SELECT\n min(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers |summarize max(Age) by Occupation", + "SELECT\n max(Age),\n Occupation\nFROM Customers\nGROUP BY Occupation" + }, + { + "Customers | where FirstName contains 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%pet%'" + }, + { + "Customers | where FirstName !contains 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%pet%')" + }, + { + "Customers | where FirstName endswith 'er'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE '%er'" + }, + { + "Customers | where FirstName !endswith 'er'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE '%er')" + }, + { + "Customers | where Education has 'School'", + "SELECT *\nFROM Customers\nWHERE hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education !has 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasTokenCaseInsensitive(Education, 'School')" + }, + { + "Customers | where Education has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE hasToken(Education, 'School')" + }, + { + "Customers | where Education !has_cs 'School'", + "SELECT *\nFROM Customers\nWHERE NOT hasToken(Education, 'School')" + }, + { + "Customers | where FirstName matches regex 'P.*r'", + "SELECT *\nFROM Customers\nWHERE match(FirstName, 'P.*r')" + }, + { + "Customers | where FirstName startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE FirstName ILIKE 'pet%'" + }, + { + "Customers | where FirstName !startswith 'pet'", + "SELECT *\nFROM Customers\nWHERE NOT (FirstName ILIKE 'pet%')" + } +}))); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index d370a67bfcc..d68252679a7 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -1056,6 +1057,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const throw Exception{"Data type have to be specified for column " + backQuote(column_name) + " to add", ErrorCodes::BAD_ARGUMENTS}; + if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) + throw Exception{"Cannot add column " + backQuote(column_name) + ": this column name is reserved for lightweight delete feature", + ErrorCodes::ILLEGAL_COLUMN}; + if (command.codec) CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs); @@ -1240,6 +1245,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const throw Exception{"Cannot rename to " + backQuote(command.rename_to) + ": column with this name already exists", ErrorCodes::DUPLICATE_COLUMN}; + if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast(table)) + throw Exception{"Cannot rename to " + backQuote(command.rename_to) + ": this column name is reserved for lightweight delete feature", + ErrorCodes::ILLEGAL_COLUMN}; + if (modified_columns.contains(column_name)) throw Exception{"Cannot rename and modify the same column " + backQuote(column_name) + " in a single ALTER query", ErrorCodes::NOT_IMPLEMENTED}; diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index ef6eb5e7a80..b2194020dca 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -35,7 +35,7 @@ namespace ErrorCodes } AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( - AsynchronousReaderPtr reader_, const ReadSettings & settings_, std::shared_ptr impl_) + IAsynchronousReader & reader_, const ReadSettings & settings_, std::shared_ptr impl_) : BufferWithOwnMemory(settings_.remote_fs_buffer_size) , reader(reader_) , priority(settings_.priority) @@ -72,7 +72,7 @@ std::future AsynchronousReadBufferFromHDFS::asyncRe request.offset = file_offset_of_buffer_end; request.priority = priority; request.ignore = 0; - return reader->submit(request); + return reader.submit(request); } void AsynchronousReadBufferFromHDFS::prefetch() diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h index a65e74a8c73..0cb4b9b7a74 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -24,7 +24,7 @@ class AsynchronousReadBufferFromHDFS : public BufferWithOwnMemory impl_); @@ -51,7 +51,7 @@ private: std::future asyncReadInto(char * data, size_t size); - AsynchronousReaderPtr reader; + IAsynchronousReader & reader; Int32 priority; std::shared_ptr impl; std::future prefetch_future; diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 5245bc89e0c..7f04d8b85d4 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -223,54 +223,69 @@ void DataPartStorageOnDisk::remove( /// NOTE relative_path can contain not only part name itself, but also some prefix like /// "moving/all_1_1_1" or "detached/all_2_3_5". We should handle this case more properly. - if (part_dir_without_slash.has_parent_path()) - { - auto parent_path = part_dir_without_slash.parent_path(); - if (parent_path == "detached") - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", part_dir, root_path); - - part_dir_without_slash = parent_path / ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); - } - else - { - part_dir_without_slash = ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); - } + /// File might be already renamed on previous try + bool has_delete_prefix = part_dir_without_slash.filename().string().starts_with("delete_tmp_"); + std::optional can_remove_description; + auto disk = volume->getDisk(); fs::path to = fs::path(root_path) / part_dir_without_slash; - std::optional can_remove_description; - - auto disk = volume->getDisk(); - if (disk->exists(to)) + if (!has_delete_prefix) { - LOG_WARNING(log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); + if (part_dir_without_slash.has_parent_path()) + { + auto parent_path = part_dir_without_slash.parent_path(); + if (parent_path == "detached") + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", + part_dir, + root_path); + + part_dir_without_slash = parent_path / ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); + } + else + { + part_dir_without_slash = ("delete_tmp_" + std::string{part_dir_without_slash.filename()}); + } + + to = fs::path(root_path) / part_dir_without_slash; + + if (disk->exists(to)) + { + LOG_WARNING(log, "Directory {} (to which part must be renamed before removing) already exists. " + "Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); + try + { + can_remove_description.emplace(can_remove_callback()); + disk->removeSharedRecursive( + fs::path(to) / "", !can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove); + } + catch (...) + { + LOG_ERROR( + log, "Cannot recursively remove directory {}. Exception: {}", fullPath(disk, to), getCurrentExceptionMessage(false)); + throw; + } + } + try { - can_remove_description.emplace(can_remove_callback()); - disk->removeSharedRecursive(fs::path(to) / "", !can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove); + disk->moveDirectory(from, to); + onRename(root_path, part_dir_without_slash); } - catch (...) + catch (const fs::filesystem_error & e) { - LOG_ERROR(log, "Cannot recursively remove directory {}. Exception: {}", fullPath(disk, to), getCurrentExceptionMessage(false)); + if (e.code() == std::errc::no_such_file_or_directory) + { + LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. " + "Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, to)); + return; + } throw; } } - try - { - disk->moveDirectory(from, to); - onRename(root_path, part_dir_without_slash); - } - catch (const fs::filesystem_error & e) - { - if (e.code() == std::errc::no_such_file_or_directory) - { - LOG_ERROR(log, "Directory {} (part to remove) doesn't exist or one of nested files has gone. Most likely this is due to manual removing. This should be discouraged. Ignoring.", fullPath(disk, to)); - return; - } - throw; - } - if (!can_remove_description) can_remove_description.emplace(can_remove_callback()); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index ba4979e57f2..7f91ffee1fe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int REPLICA_STATUS_CHANGED; } ReplicatedMergeTreeAttachThread::ReplicatedMergeTreeAttachThread(StorageReplicatedMergeTree & storage_) @@ -54,6 +55,8 @@ void ReplicatedMergeTreeAttachThread::run() { if (const auto * coordination_exception = dynamic_cast(&e)) needs_retry = Coordination::isHardwareError(coordination_exception->code); + else if (e.code() == ErrorCodes::REPLICA_STATUS_CHANGED) + needs_retry = true; if (needs_retry) { @@ -84,14 +87,14 @@ void ReplicatedMergeTreeAttachThread::run() void ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const String & replica_path) { - /// Since 20.4 and until 22.9 "/metadata" and "/metadata_version" nodes were created on replica startup. + /// Since 20.4 and until 22.9 "/metadata" node was created on replica startup and "/metadata_version" was created on ALTER. /// Since 21.12 we could use "/metadata" to check if replica is dropped (see StorageReplicatedMergeTree::dropReplica), /// but it did not work correctly, because "/metadata" node was re-created on server startup. /// Since 22.9 we do not recreate these nodes and use "/host" to check if replica is dropped. String replica_metadata; const bool replica_metadata_exists = zookeeper->tryGet(replica_path + "/metadata", replica_metadata); - if (!replica_metadata_exists || replica_metadata.empty() || !zookeeper->exists(replica_path + "/metadata_version")) + if (!replica_metadata_exists || replica_metadata.empty()) { throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Upgrade from 20.3 and older to 22.9 and newer " "should be done through an intermediate version (failed to get metadata or metadata_version for {}," @@ -139,11 +142,36 @@ void ReplicatedMergeTreeAttachThread::runImpl() checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path); + String replica_metadata_version; + const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); + if (replica_metadata_version_exists) + { + storage.metadata_version = parse(replica_metadata_version); + } + else + { + /// Table was created before 20.4 and was never altered, + /// let's initialize replica metadata version from global metadata version. + Coordination::Stat table_metadata_version_stat; + zookeeper->get(zookeeper_path + "/metadata", &table_metadata_version_stat); + + Coordination::Requests ops; + ops.emplace_back(zkutil::makeCheckRequest(zookeeper_path + "/metadata", table_metadata_version_stat.version)); + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", toString(table_metadata_version_stat.version), zkutil::CreateMode::Persistent)); + + Coordination::Responses res; + auto code = zookeeper->tryMulti(ops, res); + + if (code == Coordination::Error::ZBADVERSION) + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Failed to initialize metadata_version " + "because table was concurrently altered, will retry"); + + zkutil::KeeperMultiException::check(code, ops, res); + } + storage.checkTableStructure(replica_path, metadata_snapshot); storage.checkParts(skip_sanity_checks); - storage.metadata_version = parse(zookeeper->get(replica_path + "/metadata_version")); - /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. storage.clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index f6b110bbad0..28061aaaf48 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -316,6 +316,36 @@ StorageKeeperMap::StorageKeeperMap( for (size_t i = 0; i < 1000; ++i) { + std::string stored_metadata_string; + auto exists = client->tryGet(metadata_path, stored_metadata_string); + + if (exists) + { + // this requires same name for columns + // maybe we can do a smarter comparison for columns and primary key expression + if (stored_metadata_string != metadata_string) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", + root_path, + stored_metadata_string); + + auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); + + // tables_path was removed with drop + if (code == Coordination::Error::ZNONODE) + { + LOG_INFO(log, "Metadata nodes were removed by another server, will retry"); + continue; + } + else if (code != Coordination::Error::ZOK) + { + throw zkutil::KeeperException(code, "Failed to create table on path {} because a table with same UUID already exists", root_path); + } + + return; + } + if (client->exists(dropped_path)) { LOG_INFO(log, "Removing leftover nodes"); @@ -342,45 +372,29 @@ StorageKeeperMap::StorageKeeperMap( } } - std::string stored_metadata_string; - auto exists = client->tryGet(metadata_path, stored_metadata_string); + Coordination::Requests create_requests + { + zkutil::makeCreateRequest(metadata_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(data_path, metadata_string, zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(tables_path, "", zkutil::CreateMode::Persistent), + zkutil::makeCreateRequest(table_path, "", zkutil::CreateMode::Persistent), + }; - if (exists) + Coordination::Responses create_responses; + auto code = client->tryMulti(create_requests, create_responses); + if (code == Coordination::Error::ZNODEEXISTS) { - // this requires same name for columns - // maybe we can do a smarter comparison for columns and primary key expression - if (stored_metadata_string != metadata_string) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Path {} is already used but the stored table definition doesn't match. Stored metadata: {}", - root_path, - stored_metadata_string); + LOG_INFO(log, "It looks like a table on path {} was created by another server at the same moment, will retry", root_path); + continue; } - else + else if (code != Coordination::Error::ZOK) { - auto code = client->tryCreate(metadata_path, metadata_string, zkutil::CreateMode::Persistent); - if (code == Coordination::Error::ZNODEEXISTS) - continue; - else if (code != Coordination::Error::ZOK) - throw Coordination::Exception(code, metadata_path); + zkutil::KeeperMultiException::check(code, create_requests, create_responses); } - client->createIfNotExists(tables_path, ""); - auto code = client->tryCreate(table_path, "", zkutil::CreateMode::Persistent); - - if (code == Coordination::Error::ZOK) - { - // metadata now should be guaranteed to exist because we added our UUID to the tables_path - client->createIfNotExists(data_path, ""); - table_is_valid = true; - return; - } - - if (code == Coordination::Error::ZNONODE) - LOG_INFO(log, "Metadata nodes were deleted in background, will retry"); - else - throw Coordination::Exception(code, table_path); + table_is_valid = true; + return; } throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cc0ace576ce..552035f478c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7451,8 +7451,9 @@ String StorageReplicatedMergeTree::getTableSharedID() const /// can be called only during table initialization std::lock_guard lock(table_shared_id_mutex); + bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (table_shared_id == UUIDHelpers::Nil) + if (maybe_has_metadata_in_zookeeper && table_shared_id == UUIDHelpers::Nil) createTableSharedID(); return toString(table_shared_id); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 4dc90be596a..e15956f78be 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -508,7 +508,7 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k LOG_TRACE( log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); - return std::make_unique(std::move(factory), threadPoolCallbackRunner(IOThreadPool::get()), download_thread_num); + return std::make_unique(std::move(factory), threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), download_thread_num); } String StorageS3Source::getName() const @@ -617,7 +617,7 @@ public: s3_configuration_.rw_settings, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, - threadPoolCallbackRunner(IOThreadPool::get()), + threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), context->getWriteSettings()), compression_method, 3); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1e983d22321..f1c924a3448 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -351,7 +351,7 @@ namespace return wrapReadBufferWithCompressionMethod( std::make_unique( std::move(read_buffer_factory), - threadPoolCallbackRunner(IOThreadPool::get()), + threadPoolCallbackRunner(IOThreadPool::get(), "URLParallelRead"), download_threads), compression_method, settings.zstd_window_log_max); diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index efc4c0ed37b..6bc080045f8 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -2,54 +2,21 @@ # You can also regenerate it manually this way: # execute_process(COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/StorageSystemContributors.sh") -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set (CONFIG_BUILD "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemBuildOptions.generated.cpp") get_property (BUILD_COMPILE_DEFINITIONS DIRECTORY ${ClickHouse_SOURCE_DIR} PROPERTY COMPILE_DEFINITIONS) - get_property(TZDATA_VERSION GLOBAL PROPERTY TZDATA_VERSION_PROP) - -find_package(Git) -if(Git_FOUND) - # The commit's git hash, and whether the building workspace was dirty or not - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_HASH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # Git branch name - execute_process(COMMAND - "${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_BRANCH - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The date of the commit - SET(ENV{TZ} "UTC") - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - # The subject of the commit - execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY "${ClickHouse_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) -endif() - function(generate_system_build_options) include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) - configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) + configure_file(StorageSystemBuildOptions.cpp.in StorageSystemBuildOptions.generated.cpp) endfunction() + generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) -list (APPEND storages_system_sources ${CONFIG_BUILD}) +list (APPEND storages_system_sources StorageSystemBuildOptions.generated.cpp) add_custom_target(generate-contributors ./StorageSystemContributors.sh @@ -78,6 +45,7 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) +include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) clickhouse_embed_binaries( TARGET information_schema_metadata RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in similarity index 98% rename from src/Storages/System/StorageSystemBuildOptions.generated.cpp.in rename to src/Storages/System/StorageSystemBuildOptions.cpp.in index dde90ce459a..117d97d2cfd 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -1,4 +1,4 @@ -// .cpp autogenerated by cmake +// File was generated by CMake const char * auto_config_build[] { diff --git a/src/Storages/examples/async_read_buffer_from_hdfs.cpp b/src/Storages/examples/async_read_buffer_from_hdfs.cpp index b285857d684..17aa5479de5 100644 --- a/src/Storages/examples/async_read_buffer_from_hdfs.cpp +++ b/src/Storages/examples/async_read_buffer_from_hdfs.cpp @@ -25,7 +25,7 @@ int main() String path = "/path/to/hdfs/file"; ReadSettings settings = {}; auto in = std::make_unique(hdfs_namenode_url, path, *config, settings); - auto reader = IObjectStorage::getThreadPoolReader(); + auto & reader = IObjectStorage::getThreadPoolReader(); AsynchronousReadBufferFromHDFS buf(reader, {}, std::move(in)); String output; diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index ea682ce8c1f..7e14f5ca300 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -61,7 +61,7 @@ namespace DB ColumnsDescription /*cached_columns_*/) const { const Settings & settings = context_->getSettings(); - ParserLambdaExpression partition_by_parser; + ParserExpression partition_by_parser; ASTPtr partition_by_ast = parseQuery( partition_by_parser, "(" + partition_by_def + ")", diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a31f2298a58..5e69046915e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -131,6 +131,16 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, + "binary_aarch64_v80compat": { + "compiler": "clang-15-aarch64-v80compat", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "static_binary_name": "aarch64v80compat", + "libraries": "static", + "tidy": "disable", + "with_coverage": False, + }, "binary_freebsd": { "compiler": "clang-15-freebsd", "build_type": "", @@ -189,6 +199,7 @@ CI_CONFIG = { "binary_shared", "binary_darwin", "binary_aarch64", + "binary_aarch64_v80compat", "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 773f3ac1b57..fb7228628fd 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -164,7 +164,7 @@ def gen_versions( # The order is important, PR number is used as cache during the build versions = [str(pr_info.number), pr_commit_version] result_version = pr_commit_version - if pr_info.number == 0 and pr_info.base_name == "master": + if pr_info.number == 0 and pr_info.base_ref == "master": # First get the latest for cache versions.insert(0, "latest") diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 740cae5bc97..1848300e2f6 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -99,11 +99,11 @@ class TestDockerImageCheck(unittest.TestCase): def test_gen_version(self): pr_info = PRInfo(PRInfo.default_event.copy()) - pr_info.base_name = "anything-else" + pr_info.base_ref = "anything-else" versions, result_version = di.gen_versions(pr_info, None) self.assertEqual(versions, ["0", "0-HEAD"]) self.assertEqual(result_version, "0-HEAD") - pr_info.base_name = "master" + pr_info.base_ref = "master" versions, result_version = di.gen_versions(pr_info, None) self.assertEqual(versions, ["latest", "0", "0-HEAD"]) self.assertEqual(result_version, "0-HEAD") diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 77421ddac32..dc016a7eed9 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -132,9 +132,13 @@ class PRInfo: self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.pr_html_url = f"{repo_prefix}/pull/{self.number}" + # master or backport/xx.x/xxxxx - where the PR will be merged self.base_ref = github_event["pull_request"]["base"]["ref"] + # ClickHouse/ClickHouse self.base_name = github_event["pull_request"]["base"]["repo"]["full_name"] + # any_branch-name - the name of working branch name self.head_ref = github_event["pull_request"]["head"]["ref"] + # UserName/ClickHouse or ClickHouse/ClickHouse self.head_name = github_event["pull_request"]["head"]["repo"]["full_name"] self.body = github_event["pull_request"]["body"] self.labels = { diff --git a/tests/ci/worker/ubuntu_ami_for_ci.sh b/tests/ci/worker/ubuntu_ami_for_ci.sh index c5bc090d8d8..f2d0a7f0300 100644 --- a/tests/ci/worker/ubuntu_ami_for_ci.sh +++ b/tests/ci/worker/ubuntu_ami_for_ci.sh @@ -3,7 +3,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.293.0 +export RUNNER_VERSION=2.296.2 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { diff --git a/tests/clickhouse-test b/tests/clickhouse-test index b0b03daf3b5..12f85a5adbf 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -865,6 +865,12 @@ class TestCase: stdout=PIPE, universal_newlines=True, ).communicate()[0] + if diff.startswith("Binary files "): + diff += "Content of stdout:\n===================\n" + file = open(self.stdout_file, "r") + diff += str(file.read()) + file.close() + diff += "===================" description += f"\n{diff}\n" if debug_log: description += "\n" diff --git a/tests/integration/helpers/keeper_utils.py b/tests/integration/helpers/keeper_utils.py new file mode 100644 index 00000000000..681407e5e8c --- /dev/null +++ b/tests/integration/helpers/keeper_utils.py @@ -0,0 +1,41 @@ +import socket +import time + + +def get_keeper_socket(cluster, node, port=9181): + hosts = cluster.get_instance_ip(node.name) + client = socket.socket() + client.settimeout(10) + client.connect((hosts, port)) + return client + + +def send_4lw_cmd(cluster, node, cmd="ruok", port=9181): + client = None + try: + client = get_keeper_socket(cluster, node, port) + client.send(cmd.encode()) + data = client.recv(100_000) + data = data.decode() + return data + finally: + if client is not None: + client.close() + + +NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" + + +def wait_until_connected(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) == NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_until_quorum_lost(cluster, node, port=9181): + while send_4lw_cmd(cluster, node, "mntr", port) != NOT_SERVING_REQUESTS_ERROR_MSG: + time.sleep(0.1) + + +def wait_nodes(cluster, nodes): + for node in nodes: + wait_until_connected(cluster, node) diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index ae6b0085094..6ec307f7082 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -15,6 +15,7 @@ node1 = cluster.add_instance( def started_cluster(): try: cluster.start() + yield cluster finally: cluster.shutdown() diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml deleted file mode 100644 index c1d38a1de52..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/enable_keeper.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - - diff --git a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml b/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml deleted file mode 100644 index ebb0d98ddf4..00000000000 --- a/tests/integration/test_keeper_clickhouse_hard_restart/configs/keeper_conf.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - node1 - 9181 - - - diff --git a/tests/integration/test_keeper_force_recovery/test.py b/tests/integration/test_keeper_force_recovery/test.py index f3bb0ca56e3..f7c3787b4d8 100644 --- a/tests/integration/test_keeper_force_recovery/test.py +++ b/tests/integration/test_keeper_force_recovery/test.py @@ -2,6 +2,7 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time @@ -62,37 +63,6 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): while zk.retry(zk.exists, path) is None: time.sleep(0.1) @@ -104,9 +74,6 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: @@ -114,7 +81,7 @@ def test_cluster_recovery(started_cluster): for node in nodes[CLUSTER_SIZE:]: node.stop_clickhouse() - wait_nodes(nodes[:CLUSTER_SIZE]) + keeper_utils.wait_nodes(cluster, nodes[:CLUSTER_SIZE]) node_zks = [get_fake_zk(node.name) for node in nodes[:CLUSTER_SIZE]] @@ -152,7 +119,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -167,8 +134,7 @@ def test_cluster_recovery(started_cluster): node.stop_clickhouse() # wait for node1 to lose quorum - while send_4lw_cmd(nodes[0].name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, nodes[0]) nodes[0].copy_file_to_container( os.path.join(CONFIG_DIR, "recovered_keeper1.xml"), @@ -177,9 +143,15 @@ def test_cluster_recovery(started_cluster): nodes[0].query("SYSTEM RELOAD CONFIG") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG - send_4lw_cmd(nodes[0].name, "rcvr") - assert send_4lw_cmd(nodes[0].name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) + keeper_utils.send_4lw_cmd(cluster, nodes[0], "rcvr") + assert ( + keeper_utils.send_4lw_cmd(cluster, nodes[0], "mntr") + == keeper_utils.NOT_SERVING_REQUESTS_ERROR_MSG + ) # add one node to restore the quorum nodes[CLUSTER_SIZE].copy_file_to_container( @@ -191,10 +163,10 @@ def test_cluster_recovery(started_cluster): ) nodes[CLUSTER_SIZE].start_clickhouse() - wait_until_connected(nodes[CLUSTER_SIZE].name) + keeper_utils.wait_until_connected(cluster, nodes[CLUSTER_SIZE]) # node1 should have quorum now and accept requests - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks.append(get_fake_zk(nodes[CLUSTER_SIZE].name)) @@ -206,7 +178,7 @@ def test_cluster_recovery(started_cluster): f"/etc/clickhouse-server/config.d/enable_keeper{i+1}.xml", ) node.start_clickhouse() - wait_until_connected(node.name) + keeper_utils.wait_until_connected(cluster, node) node_zks.append(get_fake_zk(node.name)) # refresh old zk sessions @@ -223,7 +195,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zks[-1], "/test_force_recovery_last", "somedatalast") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) for zk in node_zks[:nodes_left]: assert_all_data(zk) diff --git a/tests/integration/test_keeper_force_recovery_single_node/test.py b/tests/integration/test_keeper_force_recovery_single_node/test.py index 0a554e33119..1c0d5e9a306 100644 --- a/tests/integration/test_keeper_force_recovery_single_node/test.py +++ b/tests/integration/test_keeper_force_recovery_single_node/test.py @@ -2,10 +2,11 @@ import os import pytest import socket from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooRetry CLUSTER_SIZE = 3 @@ -45,47 +46,19 @@ def started_cluster(): def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( - hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout + hosts=cluster.get_instance_ip(nodename) + ":9181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=10), + command_retry=KazooRetry(max_tries=10), ) _fake_zk_instance.start() return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def wait_until_connected(node_name): - while send_4lw_cmd(node_name, "mntr") == NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.1) - - -def wait_nodes(nodes): - for node in nodes: - wait_until_connected(node.name) - - def wait_and_assert_data(zk, path, data): - while zk.exists(path) is None: + while zk.retry(zk.exists, path) is None: time.sleep(0.1) - assert zk.get(path)[0] == data.encode() + assert zk.retry(zk.get, path)[0] == data.encode() def close_zk(zk): @@ -93,20 +66,17 @@ def close_zk(zk): zk.close() -NOT_SERVING_REQUESTS_ERROR_MSG = "This instance is not currently serving requests" - - def test_cluster_recovery(started_cluster): node_zks = [] try: - wait_nodes(nodes) + keeper_utils.wait_nodes(cluster, nodes) node_zks = [get_fake_zk(node.name) for node in nodes] data_in_cluster = [] def add_data(zk, path, data): - zk.create(path, data.encode()) + zk.retry(zk.create, path, data.encode()) data_in_cluster.append((path, data)) def assert_all_data(zk): @@ -137,7 +107,7 @@ def test_cluster_recovery(started_cluster): wait_and_assert_data(node_zk, "/test_force_recovery_extra", "somedataextra") nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) node_zks[0] = get_fake_zk(nodes[0].name) wait_and_assert_data(node_zks[0], "/test_force_recovery_extra", "somedataextra") @@ -156,7 +126,7 @@ def test_cluster_recovery(started_cluster): ) nodes[0].start_clickhouse() - wait_until_connected(nodes[0].name) + keeper_utils.wait_until_connected(cluster, nodes[0]) assert_all_data(get_fake_zk(nodes[0].name)) finally: diff --git a/tests/integration/test_keeper_four_word_command/test.py b/tests/integration/test_keeper_four_word_command/test.py index e8136d322d3..30abc7422c4 100644 --- a/tests/integration/test_keeper_four_word_command/test.py +++ b/tests/integration/test_keeper_four_word_command/test.py @@ -1,6 +1,7 @@ import socket import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -25,6 +26,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -56,28 +61,6 @@ def clear_znodes(): destroy_zk_client(zk) -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - # zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -86,23 +69,15 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def reset_node_stats(node_name=node1.name): +def reset_node_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"srst") client.recv(10) finally: @@ -110,23 +85,10 @@ def reset_node_stats(node_name=node1.name): client.close() -def send_4lw_cmd(node_name=node1.name, cmd="ruok"): +def reset_conn_stats(node=node1): client = None try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - -def reset_conn_stats(node_name=node1.name): - client = None - try: - client = get_keeper_socket(node_name) + client = keeper_utils.get_keeper_socket(cluster, node) client.send(b"crst") client.recv(10_000) finally: @@ -138,7 +100,7 @@ def test_cmd_ruok(started_cluster): client = None try: wait_nodes() - data = send_4lw_cmd(cmd="ruok") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="ruok") assert data == "imok" finally: close_keeper_socket(client) @@ -187,7 +149,7 @@ def test_cmd_mntr(started_cluster): clear_znodes() # reset stat first - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action( @@ -200,7 +162,7 @@ def test_cmd_mntr(started_cluster): delete_cnt=2, ) - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") # print(data.decode()) reader = csv.reader(data.split("\n"), delimiter="\t") @@ -252,10 +214,10 @@ def test_cmd_srst(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="srst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srst") assert data.strip() == "Server stats reset." - data = send_4lw_cmd(cmd="mntr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="mntr") assert len(data) != 0 # print(data) @@ -279,7 +241,7 @@ def test_cmd_conf(started_cluster): wait_nodes() clear_znodes() - data = send_4lw_cmd(cmd="conf") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="conf") reader = csv.reader(data.split("\n"), delimiter="=") result = {} @@ -335,8 +297,8 @@ def test_cmd_conf(started_cluster): def test_cmd_isro(started_cluster): wait_nodes() - assert send_4lw_cmd(node1.name, "isro") == "rw" - assert send_4lw_cmd(node2.name, "isro") == "ro" + assert keeper_utils.send_4lw_cmd(cluster, node1, "isro") == "rw" + assert keeper_utils.send_4lw_cmd(cluster, node2, "isro") == "ro" def test_cmd_srvr(started_cluster): @@ -345,12 +307,12 @@ def test_cmd_srvr(started_cluster): wait_nodes() clear_znodes() - reset_node_stats(node1.name) + reset_node_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="srvr") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="srvr") print("srvr output -------------------------------------") print(data) @@ -380,13 +342,13 @@ def test_cmd_stat(started_cluster): try: wait_nodes() clear_znodes() - reset_node_stats(node1.name) - reset_conn_stats(node1.name) + reset_node_stats(node1) + reset_conn_stats(node1) zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="stat") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="stat") print("stat output -------------------------------------") print(data) @@ -440,7 +402,7 @@ def test_cmd_cons(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output -------------------------------------") print(data) @@ -485,12 +447,12 @@ def test_cmd_crst(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=10) - data = send_4lw_cmd(cmd="crst") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="crst") print("crst output -------------------------------------") print(data) - data = send_4lw_cmd(cmd="cons") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="cons") print("cons output(after crst) -------------------------------------") print(data) @@ -537,7 +499,7 @@ def test_cmd_dump(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, ephemeral_cnt=2) - data = send_4lw_cmd(cmd="dump") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="dump") print("dump output -------------------------------------") print(data) @@ -563,7 +525,7 @@ def test_cmd_wchs(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchs") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchs") print("wchs output -------------------------------------") print(data) @@ -598,7 +560,7 @@ def test_cmd_wchc(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchc") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchc") print("wchc output -------------------------------------") print(data) @@ -622,7 +584,7 @@ def test_cmd_wchp(started_cluster): zk = get_fake_zk(node1.name, timeout=30.0) do_some_action(zk, create_cnt=2, watch_cnt=2) - data = send_4lw_cmd(cmd="wchp") + data = keeper_utils.send_4lw_cmd(cluster, node1, cmd="wchp") print("wchp output -------------------------------------") print(data) diff --git a/tests/integration/test_keeper_incorrect_config/test.py b/tests/integration/test_keeper_incorrect_config/test.py index cedb195a6e0..95482745b31 100644 --- a/tests/integration/test_keeper_incorrect_config/test.py +++ b/tests/integration/test_keeper_incorrect_config/test.py @@ -204,7 +204,7 @@ JUST_WRONG_CONFIG = """ """ -def test_duplicate_endpoint(started_cluster): +def test_invalid_configs(started_cluster): node1.stop_clickhouse() def assert_config_fails(config): diff --git a/tests/integration/test_keeper_mntr_pressure/test.py b/tests/integration/test_keeper_mntr_pressure/test.py index 471767210d6..d351b238ead 100644 --- a/tests/integration/test_keeper_mntr_pressure/test.py +++ b/tests/integration/test_keeper_mntr_pressure/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import pytest import random import string @@ -37,40 +38,22 @@ def started_cluster(): cluster.shutdown() -def get_keeper_socket(node_name): - hosts = cluster.get_instance_ip(node_name) - client = socket.socket() - client.settimeout(10) - client.connect((hosts, 9181)) - return client - - def close_keeper_socket(cli): if cli is not None: cli.close() -def send_4lw_cmd(node_name, cmd="ruok"): - client = None - try: - client = get_keeper_socket(node_name) - client.send(cmd.encode()) - data = client.recv(100_000) - data = data.decode() - return data - finally: - if client is not None: - client.close() - - def test_aggressive_mntr(started_cluster): - def go_mntr(node_name): - for _ in range(100000): - print(node_name, send_4lw_cmd(node_name, "mntr")) + def go_mntr(node): + for _ in range(10000): + try: + print(node.name, keeper_utils.send_4lw_cmd(cluster, node, "mntr")) + except ConnectionRefusedError: + pass - node1_thread = threading.Thread(target=lambda: go_mntr(node1.name)) - node2_thread = threading.Thread(target=lambda: go_mntr(node2.name)) - node3_thread = threading.Thread(target=lambda: go_mntr(node3.name)) + node1_thread = threading.Thread(target=lambda: go_mntr(node1)) + node2_thread = threading.Thread(target=lambda: go_mntr(node2)) + node3_thread = threading.Thread(target=lambda: go_mntr(node3)) node1_thread.start() node2_thread.start() node3_thread.start() @@ -78,8 +61,7 @@ def test_aggressive_mntr(started_cluster): node2.stop_clickhouse() node3.stop_clickhouse() - while send_4lw_cmd(node1.name, "mntr") != NOT_SERVING_REQUESTS_ERROR_MSG: - time.sleep(0.2) + keeper_utils.wait_until_quorum_lost(cluster, node1) node1.stop_clickhouse() starters = [] diff --git a/tests/integration/test_keeper_multinode_blocade_leader/test.py b/tests/integration/test_keeper_multinode_blocade_leader/test.py index d6d01a5d0a6..a7a80d90a58 100644 --- a/tests/integration/test_keeper_multinode_blocade_leader/test.py +++ b/tests/integration/test_keeper_multinode_blocade_leader/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -55,31 +56,6 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - -def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) - - def get_fake_zk(nodename, timeout=30.0): _fake_zk_instance = KazooClient( hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout @@ -88,6 +64,10 @@ def get_fake_zk(nodename, timeout=30.0): return _fake_zk_instance +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + # in extremely rare case it can take more than 5 minutes in debug build with sanitizer @pytest.mark.timeout(600) def test_blocade_leader(started_cluster): diff --git a/tests/integration/test_keeper_multinode_simple/test.py b/tests/integration/test_keeper_multinode_simple/test.py index 694600acc67..1dcbb290fa8 100644 --- a/tests/integration/test_keeper_multinode_simple/test.py +++ b/tests/integration/test_keeper_multinode_simple/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -43,29 +44,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_add/test.py b/tests/integration/test_keeper_nodes_add/test.py index c3449534e87..aad674332ac 100644 --- a/tests/integration/test_keeper_nodes_add/test.py +++ b/tests/integration/test_keeper_nodes_add/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -41,9 +42,11 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def test_nodes_add(started_cluster): + keeper_utils.wait_until_connected(cluster, node1) zk_conn = get_fake_zk(node1) for i in range(100): @@ -62,6 +65,7 @@ def test_nodes_add(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node2) zk_conn2 = get_fake_zk(node2) @@ -93,6 +97,7 @@ def test_nodes_add(started_cluster): node2.query("SYSTEM RELOAD CONFIG") waiter.wait() + keeper_utils.wait_until_connected(cluster, node3) zk_conn3 = get_fake_zk(node3) for i in range(100): diff --git a/tests/integration/test_keeper_nodes_move/test.py b/tests/integration/test_keeper_nodes_move/test.py index 31082846fb8..1e3bd95c5e7 100644 --- a/tests/integration/test_keeper_nodes_move/test.py +++ b/tests/integration/test_keeper_nodes_move/test.py @@ -11,6 +11,7 @@ import os import time from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState cluster = ClickHouseCluster(__file__) @@ -41,6 +42,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def get_fake_zk(node, timeout=30.0): diff --git a/tests/integration/test_keeper_nodes_remove/test.py b/tests/integration/test_keeper_nodes_remove/test.py index 13303d320eb..59bdaadf2e2 100644 --- a/tests/integration/test_keeper_nodes_remove/test.py +++ b/tests/integration/test_keeper_nodes_remove/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import time import os from kazoo.client import KazooClient, KazooState @@ -79,9 +80,12 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn3 = get_fake_zk(node3) zk_conn3.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node3.stop_clickhouse() @@ -91,6 +95,7 @@ def test_nodes_remove(started_cluster): ) node1.query("SYSTEM RELOAD CONFIG") + zk_conn = get_fake_zk(node1) zk_conn.sync("/test_two_0") @@ -98,8 +103,11 @@ def test_nodes_remove(started_cluster): assert zk_conn.exists("test_two_" + str(i)) is not None assert zk_conn.exists("test_two_" + str(100 + i)) is not None - with pytest.raises(Exception): + try: zk_conn2 = get_fake_zk(node2) zk_conn2.sync("/test_two_0") + time.sleep(0.1) + except Exception: + pass node2.stop_clickhouse() diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 377fa436a87..70cc14fe26d 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -46,6 +46,10 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -62,7 +66,7 @@ def test_state_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -111,7 +115,7 @@ def test_state_duplicate_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_state_duplicated_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -119,7 +123,7 @@ def test_state_duplicate_restart(started_cluster): node_zk2.create("/test_state_duplicated_restart/just_test2") node_zk2.create("/test_state_duplicated_restart/just_test3") - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk3 = get_connection_zk("node") @@ -159,6 +163,7 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): + try: node_zk = None node_zk2 = None @@ -176,7 +181,7 @@ def test_ephemeral_after_restart(started_cluster): if i % 7 == 0: node_zk.delete("/test_ephemeral_after_restart/node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_persistent_log_multinode/test.py b/tests/integration/test_keeper_persistent_log_multinode/test.py index f15e772fd5f..1552abd32e9 100644 --- a/tests/integration/test_keeper_persistent_log_multinode/test.py +++ b/tests/integration/test_keeper_persistent_log_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -26,10 +27,15 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + wait_nodes() yield cluster @@ -100,6 +106,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml deleted file mode 100644 index d51e420f733..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_1.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml deleted file mode 100644 index 3f1ee1e01a8..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_2.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml b/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml deleted file mode 100644 index a99bd5d5296..00000000000 --- a/tests/integration/test_keeper_remove_leader/configs/enable_keeper_two_nodes_3.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 2 - node2 - 9234 - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_restore_from_snapshot/test.py b/tests/integration/test_keeper_restore_from_snapshot/test.py index 7270c84bdda..bc33689dd20 100644 --- a/tests/integration/test_keeper_restore_from_snapshot/test.py +++ b/tests/integration/test_keeper_restore_from_snapshot/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -84,6 +85,7 @@ def test_recover_from_snapshot(started_cluster): # stale node should recover from leader's snapshot # with some sanitizers can start longer than 5 seconds node3.start_clickhouse(20) + keeper_utils.wait_until_connected(cluster, node3) print("Restarted") try: diff --git a/tests/integration/test_keeper_secure_client/test.py b/tests/integration/test_keeper_secure_client/test.py index 55e00880da0..2a17afac75b 100644 --- a/tests/integration/test_keeper_secure_client/test.py +++ b/tests/integration/test_keeper_secure_client/test.py @@ -40,4 +40,4 @@ def started_cluster(): def test_connection(started_cluster): # just nothrow - node2.query("SELECT * FROM system.zookeeper WHERE path = '/'") + node2.query_with_retry("SELECT * FROM system.zookeeper WHERE path = '/'") diff --git a/tests/integration/test_keeper_session/test.py b/tests/integration/test_keeper_session/test.py index 4b3aa7e3fdf..72a162c1765 100644 --- a/tests/integration/test_keeper_session/test.py +++ b/tests/integration/test_keeper_session/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import time import socket import struct @@ -52,25 +53,8 @@ def destroy_zk_client(zk): pass -def wait_node(node): - for _ in range(100): - zk = None - try: - zk = get_fake_zk(node.name, timeout=30.0) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - destroy_zk_client(zk) - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for n in [node1, node2, node3]: - wait_node(n) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_snapshot_small_distance/test.py b/tests/integration/test_keeper_snapshot_small_distance/test.py index 4351c5ac96f..6a64cf0ac92 100644 --- a/tests/integration/test_keeper_snapshot_small_distance/test.py +++ b/tests/integration/test_keeper_snapshot_small_distance/test.py @@ -2,6 +2,7 @@ ##!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from multiprocessing.dummy import Pool from kazoo.client import KazooClient, KazooState import random @@ -22,7 +23,7 @@ node3 = cluster.add_instance( def start_zookeeper(node): - node1.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) + node.exec_in_container(["bash", "-c", "/opt/zookeeper/bin/zkServer.sh start"]) def stop_zookeeper(node): @@ -66,6 +67,7 @@ def stop_clickhouse(node): def start_clickhouse(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots, node): diff --git a/tests/integration/test_keeper_snapshots/test.py b/tests/integration/test_keeper_snapshots/test.py index 08f60e538a4..ce57a852dca 100644 --- a/tests/integration/test_keeper_snapshots/test.py +++ b/tests/integration/test_keeper_snapshots/test.py @@ -3,6 +3,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -50,6 +51,11 @@ def get_connection_zk(nodename, timeout=30.0): return _fake_zk_instance +def restart_clickhouse(): + node.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node) + + def test_state_after_restart(started_cluster): try: node_zk = None @@ -69,7 +75,7 @@ def test_state_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") @@ -123,7 +129,7 @@ def test_ephemeral_after_restart(started_cluster): else: existing_children.append("node" + str(i)) - node.restart_clickhouse(kill=True) + restart_clickhouse() node_zk2 = get_connection_zk("node") diff --git a/tests/integration/test_keeper_snapshots_multinode/test.py b/tests/integration/test_keeper_snapshots_multinode/test.py index 1461f35e6a4..a68a34dae2e 100644 --- a/tests/integration/test_keeper_snapshots_multinode/test.py +++ b/tests/integration/test_keeper_snapshots_multinode/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -20,6 +21,10 @@ node3 = cluster.add_instance( from kazoo.client import KazooClient, KazooState +def wait_nodes(): + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) + + @pytest.fixture(scope="module") def started_cluster(): try: @@ -94,6 +99,8 @@ def test_restart_multinode(started_cluster): node1.restart_clickhouse(kill=True) node2.restart_clickhouse(kill=True) node3.restart_clickhouse(kill=True) + wait_nodes() + for i in range(100): try: node1_zk = get_fake_zk("node1") diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml deleted file mode 100644 index 1e57d42016d..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper1.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 1 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml deleted file mode 100644 index 98422b41c9b..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper2.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 2 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml b/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml deleted file mode 100644 index 43800bd2dfb..00000000000 --- a/tests/integration/test_keeper_start_as_follower_multinode/configs/enable_keeper3.xml +++ /dev/null @@ -1,34 +0,0 @@ - - - 9181 - 3 - /var/lib/clickhouse/coordination/log - /var/lib/clickhouse/coordination/snapshots - - - 5000 - 10000 - trace - - - - - 1 - node1 - 9234 - - - 2 - node2 - 9234 - true - - - 3 - node3 - 9234 - true - - - - diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py index f1de469c5a1..bd29ded357f 100644 --- a/tests/integration/test_keeper_three_nodes_two_alive/test.py +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -48,6 +49,7 @@ def started_cluster(): def start(node): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def delete_with_retry(node_name, path): @@ -138,6 +140,7 @@ def test_restart_third_node(started_cluster): node1_zk.create("/test_restart", b"aaaa") node3.restart_clickhouse() + keeper_utils.wait_until_connected(cluster, node3) assert node3.contains_in_log( "Connected to ZooKeeper (or Keeper) before internal Keeper start" diff --git a/tests/integration/test_keeper_two_nodes_cluster/test.py b/tests/integration/test_keeper_two_nodes_cluster/test.py index 8c0276f7d77..c6bc0ebd33a 100644 --- a/tests/integration/test_keeper_two_nodes_cluster/test.py +++ b/tests/integration/test_keeper_two_nodes_cluster/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -40,29 +41,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2]) def get_fake_zk(nodename, timeout=30.0): diff --git a/tests/integration/test_keeper_znode_time/test.py b/tests/integration/test_keeper_znode_time/test.py index bff3d52014e..f2076acc4d2 100644 --- a/tests/integration/test_keeper_znode_time/test.py +++ b/tests/integration/test_keeper_znode_time/test.py @@ -1,5 +1,6 @@ import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils import random import string import os @@ -42,29 +43,8 @@ def smaller_exception(ex): return "\n".join(str(ex).split("\n")[0:2]) -def wait_node(node): - for _ in range(100): - zk = None - try: - node.query("SELECT * FROM system.zookeeper WHERE path = '/'") - zk = get_fake_zk(node.name, timeout=30.0) - zk.create("/test", sequence=True) - print("node", node.name, "ready") - break - except Exception as ex: - time.sleep(0.2) - print("Waiting until", node.name, "will be ready, exception", ex) - finally: - if zk: - zk.stop() - zk.close() - else: - raise Exception("Can't wait node", node.name, "to become ready") - - def wait_nodes(): - for node in [node1, node2, node3]: - wait_node(node) + keeper_utils.wait_nodes(cluster, [node1, node2, node3]) def get_fake_zk(nodename, timeout=30.0): @@ -129,6 +109,7 @@ def test_server_restart(started_cluster): node1_zk.set("/test_server_restart/" + str(child_node), b"somevalue") node3.restart_clickhouse(kill=True) + keeper_utils.wait_until_connected(cluster, node3) node2_zk = get_fake_zk("node2") node3_zk = get_fake_zk("node3") diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 50a9ee6a4a7..af8d1ca4bf9 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import pytest from helpers.cluster import ClickHouseCluster +import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient, KazooState from kazoo.security import ACL, make_digest_acl, make_acl from kazoo.exceptions import ( @@ -60,6 +61,7 @@ def stop_clickhouse(): def start_clickhouse(): node.start_clickhouse() + keeper_utils.wait_until_connected(cluster, node) def copy_zookeeper_data(make_zk_snapshots): diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 4df47ec036b..bc755220c2f 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -4,9 +4,6 @@ import os import pytest -# FIXME Test is temporarily disabled due to flakyness -# https://github.com/ClickHouse/ClickHouse/issues/39700 - pytestmark = pytest.mark.skip from helpers.cluster import ClickHouseCluster diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.reference b/tests/queries/0_stateless/00984_parser_stack_overflow.reference index a46c80e9233..0cf6a1f96df 100644 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.reference +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.reference @@ -1,6 +1,4 @@ exceeded exceeded -exceeded -exceeded 20002 1 diff --git a/tests/queries/0_stateless/00984_parser_stack_overflow.sh b/tests/queries/0_stateless/00984_parser_stack_overflow.sh index 329e51e774a..a7854b91ee2 100755 --- a/tests/queries/0_stateless/00984_parser_stack_overflow.sh +++ b/tests/queries/0_stateless/00984_parser_stack_overflow.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-asan # Such a huge timeout mostly for debug build. CLICKHOUSE_CURL_TIMEOUT=60 diff --git a/tests/queries/0_stateless/01062_max_parser_depth.reference b/tests/queries/0_stateless/01062_max_parser_depth.reference index 3efc4f06710..fc9c24676eb 100644 --- a/tests/queries/0_stateless/01062_max_parser_depth.reference +++ b/tests/queries/0_stateless/01062_max_parser_depth.reference @@ -1,4 +1,3 @@ - -Maximum parse depth (42) exceeded. - -Maximum parse depth (20) exceeded. +- diff --git a/tests/queries/0_stateless/01062_max_parser_depth.sh b/tests/queries/0_stateless/01062_max_parser_depth.sh index 27593272f92..baebd7becd8 100755 --- a/tests/queries/0_stateless/01062_max_parser_depth.sh +++ b/tests/queries/0_stateless/01062_max_parser_depth.sh @@ -9,3 +9,4 @@ echo - echo 'select (1+1)*(2+1)' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=42" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." echo - echo 'select 1' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&max_parser_depth=20" -d @- 2>&1 | grep -oP "Maximum parse depth .* exceeded." +echo - diff --git a/tests/queries/0_stateless/01196_max_parser_depth.reference b/tests/queries/0_stateless/01196_max_parser_depth.reference index 072fc270acd..d2222a8b895 100644 --- a/tests/queries/0_stateless/01196_max_parser_depth.reference +++ b/tests/queries/0_stateless/01196_max_parser_depth.reference @@ -1,4 +1,5 @@ -Code: 306 -Code: 306 -Code: 306 -Code: 306 +Code: 167 +Code: 167 +Code: 167 +Code: 167 +Code: 167 diff --git a/tests/queries/0_stateless/01196_max_parser_depth.sh b/tests/queries/0_stateless/01196_max_parser_depth.sh index ae4851bf0c3..57283feb7f0 100755 --- a/tests/queries/0_stateless/01196_max_parser_depth.sh +++ b/tests/queries/0_stateless/01196_max_parser_depth.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) { printf "select "; for _ in {1..1000}; do printf "coalesce(null, "; done; printf "1"; for _ in {1..1000}; do printf ")"; done; } > "${CLICKHOUSE_TMP}"/query -cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CLIENT 2>&1 | grep -o -F 'Code: 306' -cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_LOCAL 2>&1 | grep -o -F 'Code: 306' -cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CURL --data-binary @- -vsS "$CLICKHOUSE_URL" 2>&1 | grep -o -F 'Code: 306' +cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CLIENT 2>&1 | grep -o -F 'Code: 167' +cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_LOCAL 2>&1 | grep -o -F 'Code: 167' +cat "${CLICKHOUSE_TMP}"/query | $CLICKHOUSE_CURL --data-binary @- -vsS "$CLICKHOUSE_URL" 2>&1 | grep -o -F 'Code: 167' diff --git a/tests/queries/0_stateless/02354_annoy.sql b/tests/queries/0_stateless/02354_annoy.sql index d25b7333a89..8a8d023a104 100644 --- a/tests/queries/0_stateless/02354_annoy.sql +++ b/tests/queries/0_stateless/02354_annoy.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest, no-ubsan, no-cpu-aarch64 +-- Tags: no-fasttest, no-ubsan, no-cpu-aarch64, no-backward-compatibility-check SET allow_experimental_annoy_index = 1; diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference new file mode 100644 index 00000000000..9c440ab4c67 --- /dev/null +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.reference @@ -0,0 +1,15 @@ +===ddl_format_version 3==== +1 +1 +1 +1 +===ddl_format_version 4==== +1 +1 +1 +1 +===exception==== +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh new file mode 100755 index 00000000000..6164ff97d9f --- /dev/null +++ b/tests/queries/0_stateless/02423_ddl_for_opentelemetry.sh @@ -0,0 +1,159 @@ +#!/usr/bin/env bash +# Tags: zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# The test cases in this file cover DDLs running on both Replicated database engine and non-Replicated database engine. +# Since the processing flow is a little bit different from each other, in order to share same reference file, +# we compare the expected result and actual result by ourselves. See check_span method below for more detail. + +# This function takes following arguments: +# $1 - OpenTelemetry Trace Id +# $2 - Query +# $3 - Query Settings +function execute_query() +{ + # Some queries are supposed to fail, use -f to suppress error messages + echo $2 | ${CLICKHOUSE_CURL_COMMAND} -q -s --max-time 180 \ + -X POST \ + -H "traceparent: 00-$1-5150000000000515-01" \ + -H "tracestate: a\nb cd" \ + "${CLICKHOUSE_URL}&${3}" \ + --data @- +} + +# This function takes following argument: +# $1 - expected +# $2 - OpenTelemetry Trace Id +# $3 - operation_name pattern +# $4 - extra condition +function check_span() +{ + if [ -n "$4" ]; then + extra_condition=" AND ${4}" + else + extra_condition="" + fi + + ret=$(${CLICKHOUSE_CLIENT} -nq " + SYSTEM FLUSH LOGS; + + SELECT count() + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) = '${2}' + AND operation_name like '${3}' + ${extra_condition};") + + if [ $ret = $1 ]; then + echo 1 + else + echo "[operation_name like '${3}' ${extra_condition}]=$ret, expected: ${1}" + + # echo the span logs to help analyze + ${CLICKHOUSE_CLIENT} -q " + SELECT operation_name, attribute + FROM system.opentelemetry_span_log + WHERE finish_date >= yesterday() + AND lower(hex(trace_id)) ='${2}' + ORDER BY start_time_us + Format PrettyCompact + " + fi +} + +# +# Set up +# +${CLICKHOUSE_CLIENT} -q " +DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; +" + +# Support Replicated database engine +cluster_name=$($CLICKHOUSE_CLIENT -q "select if(engine = 'Replicated', name, 'test_shard_localhost') from system.databases where name='$CLICKHOUSE_DATABASE'") + +# +# Only format_version 4 enables the tracing +# +for ddl_version in 3 4; do + # Echo a separator so that the reference file is more clear for reading + echo "===ddl_format_version ${ddl_version}====" + + trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); + execute_query $trace_id "CREATE TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry ON CLUSTER ${cluster_name} (id UInt64) Engine=MergeTree ORDER BY id" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=${ddl_version}" + + check_span 1 $trace_id "HTTPHandler" + + if [ $cluster_name = "test_shard_localhost" ]; then + check_span 1 $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" + else + check_span 1 $trace_id "%tryEnqueueAndExecuteEntry%" "attribute['clickhouse.cluster']='${cluster_name}'" + fi + + if [ $cluster_name = "test_shard_localhost" ]; then + # The tracing is only enabled when entry format version is 4 + if [ $ddl_version = "4" ]; then + expected=1 + else + expected=0 + fi + else + # For Replicated database engine, the tracing is always enabled because it calls DDLWorker::processTask directly + expected=1 + fi + check_span $expected $trace_id "%DDLWorker::processTask%" + + # For queries that tracing are enabled(format version is 4 or Replicated database engine), there should be two 'query' spans, + # one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # + # For other format, there should be only one 'query' span + if [ $cluster_name = "test_shard_localhost" ]; then + if [ $ddl_version = "4" ]; then + expected=2 + else + expected=1 + fi + else + expected=2 + fi + check_span $expected $trace_id "query" + + # Remove table + # Under Replicated database engine, the DDL is executed as ON CLUSTER DDL, so distributed_ddl_output_mode is needed to supress output + ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode none -q " + DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry; + " +done + +# +# an exceptional case, DROP a non-exist table +# +# Echo a separator so that the reference file is more clear for reading +echo "===exception====" + +trace_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(generateUUIDv4()))"); +execute_query $trace_id "DROP TABLE ${CLICKHOUSE_DATABASE}.ddl_test_for_opentelemetry_non_exist ON CLUSTER ${cluster_name}" "distributed_ddl_output_mode=none&distributed_ddl_entry_format_version=4" 2>&1| grep -Fv "UNKNOWN_TABLE" + +check_span 1 $trace_id "HTTPHandler" + +if [ $cluster_name = "test_shard_localhost" ]; then + expected=1 +else + # For Replicated database it will fail on initiator before enqueueing distributed DDL + expected=0 +fi +check_span $expected $trace_id "%executeDDLQueryOnCluster%" "attribute['clickhouse.cluster']='${cluster_name}'" +check_span $expected $trace_id "%DDLWorker::processTask%" + +if [ $cluster_name = "test_shard_localhost" ]; then + # There should be two 'query' spans, one is for the HTTPHandler, the other is for the DDL executing in DDLWorker. + # Both of these two spans contain exception + expected=2 +else + # For Replicated database, there should only one query span + expected=1 +fi +# We don't case about the exact value of exception_code, just check it's there. +check_span $expected $trace_id "query" "attribute['clickhouse.exception_code']<>''" diff --git a/tests/queries/0_stateless/02452_json_utf8_validation.reference b/tests/queries/0_stateless/02452_json_utf8_validation.reference new file mode 100644 index 00000000000..c7155832e1e --- /dev/null +++ b/tests/queries/0_stateless/02452_json_utf8_validation.reference @@ -0,0 +1,174 @@ +JSONCompact +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + ["� �"] + ], + + "rows": 1 +} +JSON +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + { + "s": "� �" + } + ], + + "rows": 1 +} +XML + + + + + + s + String + + + + + + � � + + + 1 + +JSONColumnsWithMetadata +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + { + "s": ["� �"] + }, + + "rows": 1 +} +JSONEachRow +{"s":"� �"} +JSONCompactEachRow +["� �"] +JSONColumns +{ + "s": ["� �"] +} +JSONCompactColumns +[ + ["� �"] +] +JSONObjectEachRow +{ + "row_1": {"s":"� �"} +} +JSONCompact +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + ["� �"] + ], + + "rows": 1 +} +JSON +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + [ + { + "s": "� �" + } + ], + + "rows": 1 +} +XML + + + + + + s + String + + + + + + � � + + + 1 + +JSONColumnsWithMetadata +{ + "meta": + [ + { + "name": "s", + "type": "String" + } + ], + + "data": + { + "s": ["� �"] + }, + + "rows": 1 +} +JSONEachRow +{"s":" "} +JSONCompactEachRow +[" "] +JSONColumns +{ + "s": [" "] +} +JSONCompactColumns +[ + [" "] +] +JSONObjectEachRow +{ + "row_1": {"s":" "} +} diff --git a/tests/queries/0_stateless/02452_json_utf8_validation.sql b/tests/queries/0_stateless/02452_json_utf8_validation.sql new file mode 100644 index 00000000000..e0ddbcdc919 --- /dev/null +++ b/tests/queries/0_stateless/02452_json_utf8_validation.sql @@ -0,0 +1,42 @@ +SET output_format_write_statistics = 0; +SET output_format_json_validate_utf8 = 1; +SELECT 'JSONCompact'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompact; +SELECT 'JSON'; +SELECT '\xED\x20\xA8' AS s FORMAT JSON; +SELECT 'XML'; +SELECT '\xED\x20\xA8' AS s FORMAT XML; +SELECT 'JSONColumnsWithMetadata'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumnsWithMetadata; +SELECT 'JSONEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONEachRow; +SELECT 'JSONCompactEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactEachRow; +SELECT 'JSONColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumns; +SELECT 'JSONCompactColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactColumns; +SELECT 'JSONObjectEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONObjectEachRow; + +SET output_format_json_validate_utf8 = 0; +SELECT 'JSONCompact'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompact; +SELECT 'JSON'; +SELECT '\xED\x20\xA8' AS s FORMAT JSON; +SELECT 'XML'; +SELECT '\xED\x20\xA8' AS s FORMAT XML; +SELECT 'JSONColumnsWithMetadata'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumnsWithMetadata; +SELECT 'JSONEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONEachRow; +SELECT 'JSONCompactEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactEachRow; +SELECT 'JSONColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONColumns; +SELECT 'JSONCompactColumns'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONCompactColumns; +SELECT 'JSONObjectEachRow'; +SELECT '\xED\x20\xA8' AS s FORMAT JSONObjectEachRow; + + diff --git a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql new file mode 100644 index 00000000000..999210ef36e --- /dev/null +++ b/tests/queries/0_stateless/02454_disable_mergetree_with_lightweight_delete_column.sql @@ -0,0 +1,21 @@ +drop table if exists t_row_exists; + +create table t_row_exists(a int, _row_exists int) engine=MergeTree order by a; --{serverError 44} + +create table t_row_exists(a int, b int) engine=MergeTree order by a; +alter table t_row_exists add column _row_exists int; --{serverError ILLEGAL_COLUMN} +alter table t_row_exists rename column b to _row_exists; --{serverError ILLEGAL_COLUMN} +alter table t_row_exists rename column _row_exists to c; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} +alter table t_row_exists drop column _row_exists; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} +alter table t_row_exists drop column unknown_column; --{serverError NOT_FOUND_COLUMN_IN_BLOCK} +drop table t_row_exists; + +create table t_row_exists(a int, _row_exists int) engine=Memory; +insert into t_row_exists values(1,1); +select * from t_row_exists; +drop table t_row_exists; + +create table t_row_exists(a int, b int) engine=Memory; +alter table t_row_exists add column _row_exists int; --{serverError NOT_IMPLEMENTED} +alter table t_row_exists rename column b to _row_exists; --{serverError NOT_IMPLEMENTED} +drop table t_row_exists;