diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 00000000000..7f20206a7b3 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,53 @@ +name: Ligthweight GithubActions +on: # yamllint disable-line rule:truthy + pull_request: + types: + - labeled + - unlabeled + - synchronize + - reopened + - opened + branches: + - master +jobs: + CheckLabels: + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Labels check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 run_check.py + DockerHubPush: + needs: CheckLabels + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Images check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 docker_images_check.py + - name: Upload images files to artifacts + uses: actions/upload-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/docker_images_check/changed_images.json + StyleCheck: + needs: DockerHubPush + runs-on: [self-hosted] + steps: + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ runner.temp }}/style_check + - name: Check out repository code + uses: actions/checkout@v2 + - name: Style Check + run: cd $GITHUB_WORKSPACE/tests/ci && python3 style_check.py + FinishCheck: + needs: [StyleCheck, DockerHubPush, CheckLabels] + runs-on: [self-hosted] + steps: + - name: Check out repository code + uses: actions/checkout@v2 + - name: Finish label + run: cd $GITHUB_WORKSPACE/tests/ci && python3 finish_check.py diff --git a/.gitmodules b/.gitmodules index 696676200fe..e0404c1269d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -140,7 +140,7 @@ url = https://github.com/ClickHouse-Extras/libc-headers.git [submodule "contrib/replxx"] path = contrib/replxx - url = https://github.com/ClickHouse-Extras/replxx.git + url = https://github.com/AmokHuginnsson/replxx.git [submodule "contrib/avro"] path = contrib/avro url = https://github.com/ClickHouse-Extras/avro.git @@ -213,6 +213,7 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse-Extras/boringssl.git + branch = MergeWithUpstream [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse-Extras/NuRaft.git @@ -249,6 +250,9 @@ [submodule "contrib/magic_enum"] path = contrib/magic_enum url = https://github.com/Neargye/magic_enum +[submodule "contrib/libprotobuf-mutator"] + path = contrib/libprotobuf-mutator + url = https://github.com/google/libprotobuf-mutator [submodule "contrib/sysroot"] path = contrib/sysroot url = https://github.com/ClickHouse-Extras/sysroot.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 718aa751cc2..adaaa0f1bc7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -### ClickHouse release v21.10, 2021-10-14 +### ClickHouse release v21.10, 2021-10-16 #### Backward Incompatible Change diff --git a/CMakeLists.txt b/CMakeLists.txt index 685b2c25a0d..7808edeff9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,21 @@ if (ENABLE_FUZZING) message (STATUS "Fuzzing instrumentation enabled") set (FUZZER "libfuzzer") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nostdlib++") + set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) + set (ENABLE_LIBRARIES 0) + set (ENABLE_SSL 1) + set (USE_INTERNAL_SSL_LIBRARY 1) + set (USE_UNWIND ON) + set (ENABLE_EMBEDDED_COMPILER 0) + set (ENABLE_EXAMPLES 0) + set (ENABLE_UTILS 0) + set (ENABLE_THINLTO 0) + set (ENABLE_TCMALLOC 0) + set (ENABLE_JEMALLOC 0) + set (ENABLE_CHECK_HEAVY_BUILDS 1) + set (GLIBC_COMPATIBILITY OFF) + set (ENABLE_PROTOBUF ON) + set (USE_INTERNAL_PROTOBUF_LIBRARY ON) endif() # Global libraries @@ -188,7 +203,7 @@ endif () option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) -if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND CMAKE_VERSION VERSION_GREATER "3.9.0") +if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND NOT UNBUNDLED AND MAKE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL) # Only for Linux, x86_64 or aarch64. option(GLIBC_COMPATIBILITY "Enable compatibility with older glibc libraries." ON) elseif(GLIBC_COMPATIBILITY) @@ -203,10 +218,6 @@ if (GLIBC_COMPATIBILITY) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${CMAKE_CURRENT_SOURCE_DIR}/base/glibc-compatibility/glibc-compat-2.32.h") endif() -if (NOT CMAKE_VERSION VERSION_GREATER "3.9.0") - message (WARNING "CMake version must be greater than 3.9.0 for production builds.") -endif () - # Make sure the final executable has symbols exported set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") @@ -582,6 +593,7 @@ include (cmake/find/cassandra.cmake) include (cmake/find/sentry.cmake) include (cmake/find/stats.cmake) include (cmake/find/datasketches.cmake) +include (cmake/find/libprotobuf-mutator.cmake) set (USE_INTERNAL_CITYHASH_LIBRARY ON CACHE INTERNAL "") find_contrib_lib(cityhash) diff --git a/base/base/LineReader.cpp b/base/base/LineReader.cpp index 8600f4c7b65..d325154ee61 100644 --- a/base/base/LineReader.cpp +++ b/base/base/LineReader.cpp @@ -5,6 +5,10 @@ #include #include +#include +#include +#include + #ifdef OS_LINUX /// We can detect if code is linked with one or another readline variants or open the library dynamically. diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 9bf6ec41255..38867faf5d5 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -177,6 +177,10 @@ ReplxxLineReader::ReplxxLineReader( /// bind C-p/C-n to history-previous/history-next like readline. rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + + /// bind C-j to ENTER action. + rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }); + /// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind /// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but /// it also binded to M-p/M-n). diff --git a/base/base/phdr_cache.cpp b/base/base/phdr_cache.cpp index d2388666f73..8ae10f6bf83 100644 --- a/base/base/phdr_cache.cpp +++ b/base/base/phdr_cache.cpp @@ -6,7 +6,7 @@ #include -#if defined(__linux__) && !defined(THREAD_SANITIZER) +#if defined(__linux__) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL) #define USE_PHDR_CACHE 1 #endif diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index 80e62d0a6d6..0f41296819e 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -84,7 +84,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, log_file); log->setLevel(log_level); - split->addChannel(log); + split->addChannel(log, "log"); } const auto errorlog_path = config.getString("logger.errorlog", ""); @@ -116,7 +116,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr errorlog = new DB::OwnFormattingChannel(pf, error_log_file); errorlog->setLevel(errorlog_level); errorlog->open(); - split->addChannel(errorlog); + split->addChannel(errorlog, "errorlog"); } if (config.getBool("logger.use_syslog", false)) @@ -155,7 +155,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, syslog_channel); log->setLevel(syslog_level); - split->addChannel(log); + split->addChannel(log, "syslog"); } bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); @@ -177,7 +177,7 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log Poco::AutoPtr log = new DB::OwnFormattingChannel(pf, new Poco::ConsoleChannel); logger.warning("Logging " + console_log_level_string + " to console"); log->setLevel(console_log_level); - split->addChannel(log); + split->addChannel(log, "console"); } split->open(); @@ -224,6 +224,89 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log } } +void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger) +{ + int max_log_level = 0; + + const auto log_level_string = config.getString("logger.level", "trace"); + int log_level = Poco::Logger::parseLevel(log_level_string); + if (log_level > max_log_level) + max_log_level = log_level; + + const auto log_path = config.getString("logger.log", ""); + if (!log_path.empty()) + split->setLevel("log", log_level); + else + split->setLevel("log", 0); + + // Set level to console + bool is_daemon = config.getBool("application.runAsDaemon", false); + bool should_log_to_console = isatty(STDIN_FILENO) || isatty(STDERR_FILENO); + if (config.getBool("logger.console", false) + || (!config.hasProperty("logger.console") && !is_daemon && should_log_to_console)) + split->setLevel("console", log_level); + else + split->setLevel("console", 0); + + // Set level to errorlog + int errorlog_level = 0; + const auto errorlog_path = config.getString("logger.errorlog", ""); + if (!errorlog_path.empty()) + { + errorlog_level = Poco::Logger::parseLevel(config.getString("logger.errorlog_level", "notice")); + if (errorlog_level > max_log_level) + max_log_level = errorlog_level; + } + split->setLevel("errorlog", errorlog_level); + + // Set level to syslog + int syslog_level = 0; + if (config.getBool("logger.use_syslog", false)) + { + syslog_level = Poco::Logger::parseLevel(config.getString("logger.syslog_level", log_level_string)); + if (syslog_level > max_log_level) + max_log_level = syslog_level; + } + split->setLevel("syslog", syslog_level); + + // Global logging level (it can be overridden for specific loggers). + logger.setLevel(max_log_level); + + // Set level to all already created loggers + std::vector names; + + logger.root().names(names); + for (const auto & name : names) + logger.root().get(name).setLevel(max_log_level); + + logger.root().setLevel(max_log_level); + + // Explicitly specified log levels for specific loggers. + { + Poco::Util::AbstractConfiguration::Keys loggers_level; + config.keys("logger.levels", loggers_level); + + if (!loggers_level.empty()) + { + for (const auto & key : loggers_level) + { + if (key == "logger" || key.starts_with("logger[")) + { + const std::string name(config.getString("logger.levels." + key + ".name")); + const std::string level(config.getString("logger.levels." + key + ".level")); + logger.root().get(name).setLevel(level); + } + else + { + // Legacy syntax + const std::string level(config.getString("logger.levels." + key, "trace")); + logger.root().get(key).setLevel(level); + } + } + } + } +} + void Loggers::closeLogs(Poco::Logger & logger) { if (log_file) diff --git a/base/loggers/Loggers.h b/base/loggers/Loggers.h index 151c1d3566f..e8afd749534 100644 --- a/base/loggers/Loggers.h +++ b/base/loggers/Loggers.h @@ -19,6 +19,8 @@ class Loggers public: void buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger, const std::string & cmd_name = ""); + void updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger); + /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); diff --git a/base/loggers/OwnFormattingChannel.h b/base/loggers/OwnFormattingChannel.h index 2336dacad04..0480d0d5061 100644 --- a/base/loggers/OwnFormattingChannel.h +++ b/base/loggers/OwnFormattingChannel.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -14,7 +15,7 @@ class OwnFormattingChannel : public Poco::Channel, public ExtendedLogChannel public: explicit OwnFormattingChannel( Poco::AutoPtr pFormatter_ = nullptr, Poco::AutoPtr pChannel_ = nullptr) - : pFormatter(std::move(pFormatter_)), pChannel(std::move(pChannel_)) + : pFormatter(std::move(pFormatter_)), pChannel(std::move(pChannel_)), priority(Poco::Message::PRIO_TRACE) { } @@ -45,7 +46,7 @@ public: private: Poco::AutoPtr pFormatter; Poco::AutoPtr pChannel; - Poco::Message::Priority priority = Poco::Message::PRIO_TRACE; + std::atomic priority; }; } diff --git a/base/loggers/OwnSplitChannel.cpp b/base/loggers/OwnSplitChannel.cpp index 2349c60856f..2ae1e65729c 100644 --- a/base/loggers/OwnSplitChannel.cpp +++ b/base/loggers/OwnSplitChannel.cpp @@ -1,4 +1,5 @@ #include "OwnSplitChannel.h" +#include "OwnFormattingChannel.h" #include #include @@ -75,7 +76,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) ExtendedLogMessage msg_ext = ExtendedLogMessage::getFrom(msg); /// Log data to child channels - for (auto & channel : channels) + for (auto & [name, channel] : channels) { if (channel.second) channel.second->logExtended(msg_ext); // extended child @@ -137,9 +138,9 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) } -void OwnSplitChannel::addChannel(Poco::AutoPtr channel) +void OwnSplitChannel::addChannel(Poco::AutoPtr channel, const std::string & name) { - channels.emplace_back(std::move(channel), dynamic_cast(channel.get())); + channels.emplace(name, ExtendedChannelPtrPair(std::move(channel), dynamic_cast(channel.get()))); } void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) @@ -149,4 +150,14 @@ void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_prior text_log_max_priority.store(max_priority, std::memory_order_relaxed); } +void OwnSplitChannel::setLevel(const std::string & name, int level) +{ + auto it = channels.find(name); + if (it != channels.end()) + { + if (auto * channel = dynamic_cast(it->second.first.get())) + channel->setLevel(level); + } +} + } diff --git a/base/loggers/OwnSplitChannel.h b/base/loggers/OwnSplitChannel.h index 03ff7b57745..fdc580e65f8 100644 --- a/base/loggers/OwnSplitChannel.h +++ b/base/loggers/OwnSplitChannel.h @@ -18,10 +18,12 @@ public: /// Makes an extended message from msg and passes it to the client logs queue and child (if possible) void log(const Poco::Message & msg) override; /// Adds a child channel - void addChannel(Poco::AutoPtr channel); + void addChannel(Poco::AutoPtr channel, const std::string & name); void addTextLog(std::shared_ptr log, int max_priority); + void setLevel(const std::string & name, int level); + private: void logSplit(const Poco::Message & msg); void tryLogSplit(const Poco::Message & msg); @@ -29,7 +31,7 @@ private: using ChannelPtr = Poco::AutoPtr; /// Handler and its pointer casted to extended interface using ExtendedChannelPtrPair = std::pair; - std::vector channels; + std::map channels; std::mutex text_log_mutex; diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 76328e1509d..69e05cf804b 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -13,6 +13,7 @@ TRIES=3 AMD64_BIN_URL="https://builds.clickhouse.com/master/amd64/clickhouse" AARCH64_BIN_URL="https://builds.clickhouse.com/master/aarch64/clickhouse" +POWERPC64_BIN_URL="https://builds.clickhouse.com/master/ppc64le/clickhouse" # Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'. @@ -38,6 +39,8 @@ if [[ ! -f clickhouse ]]; then $FASTER_DOWNLOAD "$AMD64_BIN_URL" elif [[ $CPU == aarch64 ]]; then $FASTER_DOWNLOAD "$AARCH64_BIN_URL" + elif [[ $CPU == powerpc64le ]]; then + $FASTER_DOWNLOAD "$POWERPC64_BIN_URL" else echo "Unsupported CPU type: $CPU" exit 1 @@ -52,7 +55,7 @@ fi if [[ ! -d data ]]; then if [[ ! -f $DATASET ]]; then - $FASTER_DOWNLOAD "https://clickhouse-datasets.s3.yandex.net/hits/partitions/$DATASET" + $FASTER_DOWNLOAD "https://datasets.clickhouse.com/hits/partitions/$DATASET" fi tar $TAR_PARAMS --strip-components=1 --directory=. -x -v -f $DATASET diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 46e42329958..330ab10f1bf 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -18,6 +18,10 @@ option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1) option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1) option (ENABLE_AVX "Use AVX instructions on x86_64" 0) option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0) +option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0) +option (ENABLE_BMI "Use BMI instructions on x86_64" 0) +option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0) +option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0) option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0) @@ -127,6 +131,57 @@ else () if (HAVE_AVX2 AND ENABLE_AVX2) set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () + + set (TEST_FLAG "-mavx512f -mavx512bw") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _mm512_setzero_epi32(); + (void)a; + auto b = _mm512_add_epi16(__m512i(), __m512i()); + (void)b; + return 0; + } + " HAVE_AVX512) + if (HAVE_AVX512 AND ENABLE_AVX512) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + + set (TEST_FLAG "-mbmi") + set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") + check_cxx_source_compiles(" + #include + int main() { + auto a = _blsr_u32(0); + (void)a; + return 0; + } + " HAVE_BMI) + if (HAVE_BMI AND ENABLE_BMI) + set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") + endif () + +#Limit avx2/avx512 flag for specific source build + set (X86_INTRINSICS_FLAGS "") + if (ENABLE_AVX2_FOR_SPEC_OP) + if (HAVE_BMI) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") + endif () + if (HAVE_AVX AND HAVE_AVX2) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx -mavx2") + endif () + endif () + + if (ENABLE_AVX512_FOR_SPEC_OP) + set (X86_INTRINSICS_FLAGS "") + if (HAVE_BMI) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi") + endif () + if (HAVE_AVX512) + set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw") + endif () + endif () endif () cmake_pop_check_state () diff --git a/cmake/find/amqpcpp.cmake b/cmake/find/amqpcpp.cmake index 05e5d2da751..374e6dd6d7e 100644 --- a/cmake/find/amqpcpp.cmake +++ b/cmake/find/amqpcpp.cmake @@ -10,7 +10,7 @@ if (NOT ENABLE_AMQPCPP) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt") - message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/AMQP-CPP is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal AMQP-CPP library") set (USE_AMQPCPP 0) return() diff --git a/cmake/find/avro.cmake b/cmake/find/avro.cmake index 74ccda3489f..351fa15d2d3 100644 --- a/cmake/find/avro.cmake +++ b/cmake/find/avro.cmake @@ -13,7 +13,7 @@ option (USE_INTERNAL_AVRO_LIBRARY if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/avro/lang/c++/CMakeLists.txt") if (USE_INTERNAL_AVRO_LIBRARY) - message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/avro is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find internal avro") set(USE_INTERNAL_AVRO_LIBRARY 0) endif() diff --git a/cmake/find/base64.cmake b/cmake/find/base64.cmake index acade11eb2f..ee12fbb11ba 100644 --- a/cmake/find/base64.cmake +++ b/cmake/find/base64.cmake @@ -10,11 +10,11 @@ endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64/LICENSE") set (MISSING_INTERNAL_BASE64_LIBRARY 1) - message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init") endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/base64") - message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/base64 is missing. to fix try run: \n git submodule update --init") else() set (BASE64_LIBRARY base64) set (USE_BASE64 1) diff --git a/cmake/find/brotli.cmake b/cmake/find/brotli.cmake index bf498802922..4b2ee3d6de0 100644 --- a/cmake/find/brotli.cmake +++ b/cmake/find/brotli.cmake @@ -16,7 +16,7 @@ endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/brotli/c/include/brotli/decode.h") if (USE_INTERNAL_BROTLI_LIBRARY) - message (WARNING "submodule contrib/brotli is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/brotli is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot find internal brotli") set (USE_INTERNAL_BROTLI_LIBRARY 0) endif () diff --git a/cmake/find/bzip2.cmake b/cmake/find/bzip2.cmake index 15532a67c00..5e6a6fb5841 100644 --- a/cmake/find/bzip2.cmake +++ b/cmake/find/bzip2.cmake @@ -6,7 +6,7 @@ if (NOT ENABLE_BZIP2) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h") - message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library") set (USE_NLP 0) return() diff --git a/cmake/find/capnp.cmake b/cmake/find/capnp.cmake index ee4735bd175..3d0d767bd0c 100644 --- a/cmake/find/capnp.cmake +++ b/cmake/find/capnp.cmake @@ -11,7 +11,7 @@ option (USE_INTERNAL_CAPNP_LIBRARY "Set to FALSE to use system capnproto library if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/capnproto/CMakeLists.txt") if(USE_INTERNAL_CAPNP_LIBRARY) - message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/capnproto is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal capnproto") set(USE_INTERNAL_CAPNP_LIBRARY 0) endif() @@ -34,8 +34,6 @@ endif() if (CAPNP_LIBRARIES) set (USE_CAPNP 1) elseif(NOT MISSING_INTERNAL_CAPNP_LIBRARY) - add_subdirectory(contrib/capnproto-cmake) - set (CAPNP_LIBRARIES capnpc) set (USE_CAPNP 1) set (USE_INTERNAL_CAPNP_LIBRARY 1) diff --git a/cmake/find/cassandra.cmake b/cmake/find/cassandra.cmake index b6e97ff5ef8..7fcbdbb90a5 100644 --- a/cmake/find/cassandra.cmake +++ b/cmake/find/cassandra.cmake @@ -14,7 +14,7 @@ if (APPLE) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cassandra") - message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/cassandra is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal Cassandra") set (USE_CASSANDRA 0) return() diff --git a/cmake/find/cxx.cmake b/cmake/find/cxx.cmake index b1da125e219..f38ac77b1ea 100644 --- a/cmake/find/cxx.cmake +++ b/cmake/find/cxx.cmake @@ -17,7 +17,7 @@ option (USE_INTERNAL_LIBCXX_LIBRARY "Disable to use system libcxx and libcxxabi if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/CMakeLists.txt") if (USE_INTERNAL_LIBCXX_LIBRARY) - message(WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libcxx") set(USE_INTERNAL_LIBCXX_LIBRARY 0) endif() diff --git a/cmake/find/cyrus-sasl.cmake b/cmake/find/cyrus-sasl.cmake index 974b8148fdc..f0c088995b0 100644 --- a/cmake/find/cyrus-sasl.cmake +++ b/cmake/find/cyrus-sasl.cmake @@ -6,7 +6,7 @@ endif() OPTION(ENABLE_CYRUS_SASL "Enable cyrus-sasl" ${DEFAULT_ENABLE_CYRUS_SASL}) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cyrus-sasl/README") - message (WARNING "submodule contrib/cyrus-sasl is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/cyrus-sasl is missing. to fix try run: \n git submodule update --init") set (ENABLE_CYRUS_SASL 0) endif () diff --git a/cmake/find/datasketches.cmake b/cmake/find/datasketches.cmake index 44ef324a9f2..2d7e644890a 100644 --- a/cmake/find/datasketches.cmake +++ b/cmake/find/datasketches.cmake @@ -6,7 +6,7 @@ option (USE_INTERNAL_DATASKETCHES_LIBRARY "Set to FALSE to use system DataSketch if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/datasketches-cpp/theta/CMakeLists.txt") if (USE_INTERNAL_DATASKETCHES_LIBRARY) - message(WARNING "submodule contrib/datasketches-cpp is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/datasketches-cpp is missing. to fix try run: \n git submodule update --init") endif() set(MISSING_INTERNAL_DATASKETCHES_LIBRARY 1) set(USE_INTERNAL_DATASKETCHES_LIBRARY 0) diff --git a/cmake/find/fast_float.cmake b/cmake/find/fast_float.cmake index 4b215c710ad..3e8b7cc5280 100644 --- a/cmake/find/fast_float.cmake +++ b/cmake/find/fast_float.cmake @@ -1,5 +1,5 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fast_float/include/fast_float/fast_float.h") - message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init --recursive") + message (FATAL_ERROR "submodule contrib/fast_float is missing. to fix try run: \n git submodule update --init") endif () set(FAST_FLOAT_LIBRARY fast_float) diff --git a/cmake/find/fastops.cmake b/cmake/find/fastops.cmake index 1675646654e..72426eb5912 100644 --- a/cmake/find/fastops.cmake +++ b/cmake/find/fastops.cmake @@ -10,7 +10,7 @@ if(NOT ENABLE_FASTOPS) endif() if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/fastops/fastops/fastops.h") - message(WARNING "submodule contrib/fastops is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/fastops is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal fastops library") set(MISSING_INTERNAL_FASTOPS_LIBRARY 1) endif() diff --git a/cmake/find/grpc.cmake b/cmake/find/grpc.cmake index 1e440b3b350..f4b280876ef 100644 --- a/cmake/find/grpc.cmake +++ b/cmake/find/grpc.cmake @@ -26,7 +26,7 @@ option(USE_INTERNAL_GRPC_LIBRARY "Set to FALSE to use system gRPC library instea if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/grpc/CMakeLists.txt") if(USE_INTERNAL_GRPC_LIBRARY) - message(WARNING "submodule contrib/grpc is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/grpc is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal grpc") set(USE_INTERNAL_GRPC_LIBRARY 0) endif() diff --git a/cmake/find/gtest.cmake b/cmake/find/gtest.cmake index 9d4ab2608cb..c5f987d7368 100644 --- a/cmake/find/gtest.cmake +++ b/cmake/find/gtest.cmake @@ -4,7 +4,7 @@ option (USE_INTERNAL_GTEST_LIBRARY "Set to FALSE to use system Google Test inste if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/googletest/googletest/CMakeLists.txt") if (USE_INTERNAL_GTEST_LIBRARY) - message (WARNING "submodule contrib/googletest is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/googletest is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal gtest") set (USE_INTERNAL_GTEST_LIBRARY 0) endif () diff --git a/cmake/find/h3.cmake b/cmake/find/h3.cmake index 03b6f32fc3c..e692b431e90 100644 --- a/cmake/find/h3.cmake +++ b/cmake/find/h3.cmake @@ -11,7 +11,7 @@ option(USE_INTERNAL_H3_LIBRARY "Set to FALSE to use system h3 library instead of if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/h3/src/h3lib/include/h3Index.h") if(USE_INTERNAL_H3_LIBRARY) - message(WARNING "submodule contrib/h3 is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/h3 is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal h3 library") set(USE_INTERNAL_H3_LIBRARY 0) endif() diff --git a/cmake/find/hdfs3.cmake b/cmake/find/hdfs3.cmake index 3aab2b612ef..aac6b99dfa2 100644 --- a/cmake/find/hdfs3.cmake +++ b/cmake/find/hdfs3.cmake @@ -16,7 +16,7 @@ option(USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of b if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") if(USE_INTERNAL_HDFS3_LIBRARY) - message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal HDFS3 library") set(USE_INTERNAL_HDFS3_LIBRARY 0) endif() diff --git a/cmake/find/icu.cmake b/cmake/find/icu.cmake index 40fb391656d..0b775a68eda 100644 --- a/cmake/find/icu.cmake +++ b/cmake/find/icu.cmake @@ -16,7 +16,7 @@ option (USE_INTERNAL_ICU_LIBRARY "Set to FALSE to use system ICU library instead if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/icu/icu4c/LICENSE") if (USE_INTERNAL_ICU_LIBRARY) - message (WARNING "submodule contrib/icu is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/icu is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ICU") set (USE_INTERNAL_ICU_LIBRARY 0) endif () diff --git a/cmake/find/krb5.cmake b/cmake/find/krb5.cmake index 49b7462b710..24cc51325dc 100644 --- a/cmake/find/krb5.cmake +++ b/cmake/find/krb5.cmake @@ -1,7 +1,7 @@ OPTION(ENABLE_KRB5 "Enable krb5" ${ENABLE_LIBRARIES}) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/krb5/README") - message (WARNING "submodule contrib/krb5 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/krb5 is missing. to fix try run: \n git submodule update --init") set (ENABLE_KRB5 0) endif () diff --git a/cmake/find/ldap.cmake b/cmake/find/ldap.cmake index 71222d26c66..0d14e2c4199 100644 --- a/cmake/find/ldap.cmake +++ b/cmake/find/ldap.cmake @@ -15,7 +15,7 @@ option (USE_INTERNAL_LDAP_LIBRARY "Set to FALSE to use system *LDAP library inst if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/openldap/README") if (USE_INTERNAL_LDAP_LIBRARY) - message (WARNING "Submodule contrib/openldap is missing. To fix try running:\n git submodule update --init --recursive") + message (WARNING "Submodule contrib/openldap is missing. To fix try running:\n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal LDAP library") endif () diff --git a/cmake/find/libgsasl.cmake b/cmake/find/libgsasl.cmake index 3c742af2566..3aec5c0c30a 100644 --- a/cmake/find/libgsasl.cmake +++ b/cmake/find/libgsasl.cmake @@ -16,7 +16,7 @@ endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src/gsasl.h") if (USE_INTERNAL_LIBGSASL_LIBRARY) - message (WARNING "submodule contrib/libgsasl is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libgsasl is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libgsasl") set (USE_INTERNAL_LIBGSASL_LIBRARY 0) endif () diff --git a/cmake/find/libpqxx.cmake b/cmake/find/libpqxx.cmake index f981df19aaa..b2a1e217b10 100644 --- a/cmake/find/libpqxx.cmake +++ b/cmake/find/libpqxx.cmake @@ -5,14 +5,14 @@ if (NOT ENABLE_LIBPQXX) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/CMakeLists.txt") - message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpqxx library") set (USE_LIBPQXX 0) return() endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpq/include") - message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpq needed for libpqxx") set (USE_LIBPQXX 0) return() diff --git a/cmake/find/libprotobuf-mutator.cmake b/cmake/find/libprotobuf-mutator.cmake new file mode 100644 index 00000000000..a308db67c8b --- /dev/null +++ b/cmake/find/libprotobuf-mutator.cmake @@ -0,0 +1,11 @@ +option(USE_LIBPROTOBUF_MUTATOR "Enable libprotobuf-mutator" ${ENABLE_FUZZING}) + +if (NOT USE_LIBPROTOBUF_MUTATOR) + return() +endif() + +set(LibProtobufMutator_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator") + +if (NOT EXISTS "${LibProtobufMutator_SOURCE_DIR}/README.md") + message (ERROR "submodule contrib/libprotobuf-mutator is missing. to fix try run: \n git submodule update --init") +endif() diff --git a/cmake/find/libuv.cmake b/cmake/find/libuv.cmake index f0023209309..c94dfd50b76 100644 --- a/cmake/find/libuv.cmake +++ b/cmake/find/libuv.cmake @@ -5,7 +5,7 @@ if (OS_DARWIN AND COMPILER_GCC) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libuv") - message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libuv is missing. to fix try run: \n git submodule update --init") SET(MISSING_INTERNAL_LIBUV_LIBRARY 1) return() endif() diff --git a/cmake/find/libxml2.cmake b/cmake/find/libxml2.cmake index cdf079c33d2..8f7e79d84c9 100644 --- a/cmake/find/libxml2.cmake +++ b/cmake/find/libxml2.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_LIBXML2_LIBRARY "Set to FALSE to use system libxml2 library if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h") if (USE_INTERNAL_LIBXML2_LIBRARY) - message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libxml") set (USE_INTERNAL_LIBXML2_LIBRARY 0) endif () diff --git a/cmake/find/llvm.cmake b/cmake/find/llvm.cmake index 84ac29991ab..ece5d5434a0 100644 --- a/cmake/find/llvm.cmake +++ b/cmake/find/llvm.cmake @@ -12,7 +12,7 @@ if (NOT ENABLE_EMBEDDED_COMPILER) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/CMakeLists.txt") - message (${RECONFIGURE_MESSAGE_LEVEL} "submodule /contrib/llvm is missing. to fix try run: \n git submodule update --init --recursive") + message (${RECONFIGURE_MESSAGE_LEVEL} "submodule /contrib/llvm is missing. to fix try run: \n git submodule update --init") endif () set (USE_EMBEDDED_COMPILER 1) diff --git a/cmake/find/msgpack.cmake b/cmake/find/msgpack.cmake index 130aa007ad5..c15fedd0e30 100644 --- a/cmake/find/msgpack.cmake +++ b/cmake/find/msgpack.cmake @@ -11,7 +11,7 @@ option (USE_INTERNAL_MSGPACK_LIBRARY "Set to FALSE to use system msgpack library if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/msgpack-c/include/msgpack.hpp") if(USE_INTERNAL_MSGPACK_LIBRARY) - message(WARNING "Submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init --recursive") + message(WARNING "Submodule contrib/msgpack-c is missing. To fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use internal msgpack") set(USE_INTERNAL_MSGPACK_LIBRARY 0) endif() diff --git a/cmake/find/mysqlclient.cmake b/cmake/find/mysqlclient.cmake index 634681d98f6..0af03676d71 100644 --- a/cmake/find/mysqlclient.cmake +++ b/cmake/find/mysqlclient.cmake @@ -16,7 +16,7 @@ option(USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient librar if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c/README") if(USE_INTERNAL_MYSQL_LIBRARY) - message(WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal mysql library") set(USE_INTERNAL_MYSQL_LIBRARY 0) endif() diff --git a/cmake/find/nanodbc.cmake b/cmake/find/nanodbc.cmake index 894a2a60bad..d48e294c9e5 100644 --- a/cmake/find/nanodbc.cmake +++ b/cmake/find/nanodbc.cmake @@ -7,7 +7,7 @@ if (NOT USE_INTERNAL_NANODBC_LIBRARY) endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt") - message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive") + message (FATAL_ERROR "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init") endif() set (NANODBC_LIBRARY nanodbc) diff --git a/cmake/find/nlp.cmake b/cmake/find/nlp.cmake index f1204a85dea..efa9b39ddae 100644 --- a/cmake/find/nlp.cmake +++ b/cmake/find/nlp.cmake @@ -7,21 +7,21 @@ if (NOT ENABLE_NLP) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libstemmer_c/Makefile") - message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/libstemmer_c is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libstemmer_c library, NLP functions will be disabled") set (USE_NLP 0) return() endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/wordnet-blast/CMakeLists.txt") - message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/wordnet-blast is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal wordnet-blast library, NLP functions will be disabled") set (USE_NLP 0) return() endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lemmagen-c/README.md") - message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/lemmagen-c is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal lemmagen-c library, NLP functions will be disabled") set (USE_NLP 0) return() diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake index 4e5258e132f..59caa9e7373 100644 --- a/cmake/find/nuraft.cmake +++ b/cmake/find/nuraft.cmake @@ -5,7 +5,7 @@ if (NOT ENABLE_NURAFT) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt") - message (WARNING "submodule contrib/NuRaft is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/NuRaft is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal NuRaft library") set (USE_NURAFT 0) return() diff --git a/cmake/find/orc.cmake b/cmake/find/orc.cmake index 01734224a6a..a5c3f57468a 100644 --- a/cmake/find/orc.cmake +++ b/cmake/find/orc.cmake @@ -18,7 +18,7 @@ include(cmake/find/snappy.cmake) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/orc/c++/include/orc/OrcFile.hh") if(USE_INTERNAL_ORC_LIBRARY) - message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/orc is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ORC") set(USE_INTERNAL_ORC_LIBRARY 0) endif() diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake index eb1b529fbfe..4b56a829df5 100644 --- a/cmake/find/parquet.cmake +++ b/cmake/find/parquet.cmake @@ -20,7 +20,7 @@ endif() if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/CMakeLists.txt") if(USE_INTERNAL_PARQUET_LIBRARY) - message(WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal parquet library") set(USE_INTERNAL_PARQUET_LIBRARY 0) endif() diff --git a/cmake/find/protobuf.cmake b/cmake/find/protobuf.cmake index eb9fbe3edef..096288fd2ab 100644 --- a/cmake/find/protobuf.cmake +++ b/cmake/find/protobuf.cmake @@ -15,7 +15,7 @@ option(USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instea if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/protobuf/cmake/CMakeLists.txt") if(USE_INTERNAL_PROTOBUF_LIBRARY) - message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/protobuf is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "Can't use internal protobuf") set(USE_INTERNAL_PROTOBUF_LIBRARY 0) endif() diff --git a/cmake/find/rapidjson.cmake b/cmake/find/rapidjson.cmake index f880d19143e..62db4695c58 100644 --- a/cmake/find/rapidjson.cmake +++ b/cmake/find/rapidjson.cmake @@ -10,7 +10,7 @@ option(USE_INTERNAL_RAPIDJSON_LIBRARY "Set to FALSE to use system rapidjson libr if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rapidjson/include/rapidjson/rapidjson.h") if(USE_INTERNAL_RAPIDJSON_LIBRARY) - message(WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/rapidjson is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal rapidjson library") set(USE_INTERNAL_RAPIDJSON_LIBRARY 0) endif() diff --git a/cmake/find/rdkafka.cmake b/cmake/find/rdkafka.cmake index f6460c1d9a3..5b370a42cdc 100644 --- a/cmake/find/rdkafka.cmake +++ b/cmake/find/rdkafka.cmake @@ -11,7 +11,7 @@ option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka inst if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/cppkafka/CMakeLists.txt") if(USE_INTERNAL_RDKAFKA_LIBRARY) - message (WARNING "submodule contrib/cppkafka is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/cppkafka is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal cppkafka") set (USE_INTERNAL_RDKAFKA_LIBRARY 0) endif() @@ -20,7 +20,7 @@ endif () if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/CMakeLists.txt") if(USE_INTERNAL_RDKAFKA_LIBRARY OR MISSING_INTERNAL_CPPKAFKA_LIBRARY) - message (WARNING "submodule contrib/librdkafka is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/librdkafka is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal rdkafka") set (USE_INTERNAL_RDKAFKA_LIBRARY 0) endif() diff --git a/cmake/find/re2.cmake b/cmake/find/re2.cmake index 87bc974c788..09240f33f7d 100644 --- a/cmake/find/re2.cmake +++ b/cmake/find/re2.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_RE2_LIBRARY "Set to FALSE to use system re2 library instead if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/re2/CMakeLists.txt") if(USE_INTERNAL_RE2_LIBRARY) - message(WARNING "submodule contrib/re2 is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/re2 is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal re2 library") endif() set(USE_INTERNAL_RE2_LIBRARY 0) diff --git a/cmake/find/rocksdb.cmake b/cmake/find/rocksdb.cmake index 94278a603d7..109eabc271b 100644 --- a/cmake/find/rocksdb.cmake +++ b/cmake/find/rocksdb.cmake @@ -15,7 +15,7 @@ option(USE_INTERNAL_ROCKSDB_LIBRARY "Set to FALSE to use system ROCKSDB library if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/rocksdb/CMakeLists.txt") if (USE_INTERNAL_ROCKSDB_LIBRARY) - message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib is missing. to fix try run: \n git submodule update --init") message(${RECONFIGURE_MESSAGE_LEVEL} "cannot find internal rocksdb") endif() set (MISSING_INTERNAL_ROCKSDB 1) diff --git a/cmake/find/s2geometry.cmake b/cmake/find/s2geometry.cmake index 2364c6ba193..348805b342e 100644 --- a/cmake/find/s2geometry.cmake +++ b/cmake/find/s2geometry.cmake @@ -3,7 +3,7 @@ option(ENABLE_S2_GEOMETRY "Enable S2 geometry library" ${ENABLE_LIBRARIES}) if (ENABLE_S2_GEOMETRY) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/s2geometry") - message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/s2geometry is missing. to fix try run: \n git submodule update --init") set (ENABLE_S2_GEOMETRY 0) set (USE_S2_GEOMETRY 0) else() diff --git a/cmake/find/s3.cmake b/cmake/find/s3.cmake index a2ed3e416d0..9a10c3f13ef 100644 --- a/cmake/find/s3.cmake +++ b/cmake/find/s3.cmake @@ -23,7 +23,7 @@ if (NOT USE_INTERNAL_AWS_S3_LIBRARY) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/aws/aws-cpp-sdk-s3") - message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/aws is missing. to fix try run: \n git submodule update --init") if (USE_INTERNAL_AWS_S3_LIBRARY) message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal S3 library") endif () diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index a986599abce..4283e75f9ef 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -2,7 +2,7 @@ set (SENTRY_LIBRARY "sentry") set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include") if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") - message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init") if (USE_SENTRY) message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sentry library") endif() diff --git a/cmake/find/simdjson.cmake b/cmake/find/simdjson.cmake index cffe20bdb2d..bf22a331f04 100644 --- a/cmake/find/simdjson.cmake +++ b/cmake/find/simdjson.cmake @@ -1,7 +1,7 @@ option (USE_SIMDJSON "Use simdjson" ${ENABLE_LIBRARIES}) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h") - message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init") if (USE_SIMDJSON) message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal simdjson library") endif() diff --git a/cmake/find/sqlite.cmake b/cmake/find/sqlite.cmake index cfa33fdebbb..083a9faea59 100644 --- a/cmake/find/sqlite.cmake +++ b/cmake/find/sqlite.cmake @@ -5,7 +5,7 @@ if (NOT ENABLE_SQLITE) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/sqlite-amalgamation/sqlite3.c") - message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/sqlite3-amalgamation is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal sqlite library") set (USE_SQLITE 0) return() diff --git a/cmake/find/ssl.cmake b/cmake/find/ssl.cmake index fdc0bfb27d3..fb411b93593 100644 --- a/cmake/find/ssl.cmake +++ b/cmake/find/ssl.cmake @@ -13,7 +13,7 @@ option(USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boringssl/README.md") if(USE_INTERNAL_SSL_LIBRARY) - message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/boringssl is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal ssl library") endif() set(USE_INTERNAL_SSL_LIBRARY 0) diff --git a/cmake/find/stats.cmake b/cmake/find/stats.cmake index dea108ed920..589da1603d5 100644 --- a/cmake/find/stats.cmake +++ b/cmake/find/stats.cmake @@ -2,11 +2,11 @@ option(ENABLE_STATS "Enable StatsLib library" ${ENABLE_LIBRARIES}) if (ENABLE_STATS) if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/stats") - message (WARNING "submodule contrib/stats is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/stats is missing. to fix try run: \n git submodule update --init") set (ENABLE_STATS 0) set (USE_STATS 0) elseif (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/gcem") - message (WARNING "submodule contrib/gcem is missing. to fix try run: \n git submodule update --init --recursive") + message (WARNING "submodule contrib/gcem is missing. to fix try run: \n git submodule update --init") set (ENABLE_STATS 0) set (USE_STATS 0) else() diff --git a/cmake/find/xz.cmake b/cmake/find/xz.cmake index 0d19859c6b1..f4c230859bc 100644 --- a/cmake/find/xz.cmake +++ b/cmake/find/xz.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_XZ_LIBRARY "Set to OFF to use system xz (lzma) library inst if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/xz/src/liblzma/api/lzma.h") if(USE_INTERNAL_XZ_LIBRARY) - message(WARNING "submodule contrib/xz is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/xz is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal xz (lzma) library") set(USE_INTERNAL_XZ_LIBRARY 0) endif() diff --git a/cmake/find/yaml-cpp.cmake b/cmake/find/yaml-cpp.cmake index 4633d559980..2aba6808e31 100644 --- a/cmake/find/yaml-cpp.cmake +++ b/cmake/find/yaml-cpp.cmake @@ -5,5 +5,5 @@ if (NOT USE_YAML_CPP) endif() if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/yaml-cpp/README.md") - message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init --recursive") + message (ERROR "submodule contrib/yaml-cpp is missing. to fix try run: \n git submodule update --init") endif() diff --git a/cmake/find/zlib.cmake b/cmake/find/zlib.cmake index f66f9e6713d..50a5bc63d15 100644 --- a/cmake/find/zlib.cmake +++ b/cmake/find/zlib.cmake @@ -12,7 +12,7 @@ endif () if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/${INTERNAL_ZLIB_NAME}/zlib.h") if(USE_INTERNAL_ZLIB_LIBRARY) - message(WARNING "submodule contrib/${INTERNAL_ZLIB_NAME} is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/${INTERNAL_ZLIB_NAME} is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal zlib library") endif() set(USE_INTERNAL_ZLIB_LIBRARY 0) diff --git a/cmake/find/zstd.cmake b/cmake/find/zstd.cmake index b12bb701e0e..812e1eed139 100644 --- a/cmake/find/zstd.cmake +++ b/cmake/find/zstd.cmake @@ -2,7 +2,7 @@ option (USE_INTERNAL_ZSTD_LIBRARY "Set to FALSE to use system zstd library inste if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib/zstd.h") if(USE_INTERNAL_ZSTD_LIBRARY) - message(WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init --recursive") + message(WARNING "submodule contrib/zstd is missing. to fix try run: \n git submodule update --init") message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal zstd library") set(USE_INTERNAL_ZSTD_LIBRARY 0) endif() diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index a2da7ba1915..4abd0a951e1 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -14,6 +14,8 @@ endif () if (OS_ANDROID) # pthread and rt are included in libc set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -ldl") +elseif (USE_MUSL) + set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -static -lc") else () set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl") endif () @@ -26,7 +28,7 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) # glibc-compatibility library relies to constant version of libc headers # (because minor changes in function attributes between different glibc versions will introduce incompatibilities) # This is for x86_64. For other architectures we have separate toolchains. -if (ARCH_AMD64 AND NOT_UNBUNDLED) +if (ARCH_AMD64 AND NOT_UNBUNDLED AND NOT CMAKE_CROSSCOMPILING) set(CMAKE_C_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) set(CMAKE_CXX_STANDARD_INCLUDE_DIRECTORIES ${ClickHouse_SOURCE_DIR}/contrib/libc-headers/x86_64-linux-gnu ${ClickHouse_SOURCE_DIR}/contrib/libc-headers) endif () @@ -37,8 +39,10 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) if (NOT OS_ANDROID) - # Our compatibility layer doesn't build under Android, many errors in musl. - add_subdirectory(base/glibc-compatibility) + if (NOT USE_MUSL) + # Our compatibility layer doesn't build under Android, many errors in musl. + add_subdirectory(base/glibc-compatibility) + endif () add_subdirectory(base/harmful) endif () diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake new file mode 100644 index 00000000000..1ccbd3ee0da --- /dev/null +++ b/cmake/linux/toolchain-riscv64.cmake @@ -0,0 +1,32 @@ +set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set (CMAKE_SYSTEM_NAME "Linux") +set (CMAKE_SYSTEM_PROCESSOR "riscv64") +set (CMAKE_C_COMPILER_TARGET "riscv64-linux-gnu") +set (CMAKE_CXX_COMPILER_TARGET "riscv64-linux-gnu") +set (CMAKE_ASM_COMPILER_TARGET "riscv64-linux-gnu") + +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-riscv64") + +set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}") + +find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") +find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9") + +set (CMAKE_AR "${LLVM_AR_PATH}" CACHE FILEPATH "" FORCE) +set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}" CACHE FILEPATH "" FORCE) + +set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") + +set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE) + +set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") +set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld") + +set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) + +set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake new file mode 100644 index 00000000000..0406b5de0ba --- /dev/null +++ b/cmake/linux/toolchain-x86_64-musl.cmake @@ -0,0 +1,35 @@ +set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +set (CMAKE_SYSTEM_NAME "Linux") +set (CMAKE_SYSTEM_PROCESSOR "x86_64") +set (CMAKE_C_COMPILER_TARGET "x86_64-linux-musl") +set (CMAKE_CXX_COMPILER_TARGET "x86_64-linux-musl") +set (CMAKE_ASM_COMPILER_TARGET "x86_64-linux-musl") + +set (TOOLCHAIN_PATH "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/linux-x86_64-musl") + +set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}") + +find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-13" "llvm-ar-12" "llvm-ar-11" "llvm-ar-10" "llvm-ar-9" "llvm-ar-8") +find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-13" "llvm-ranlib-12" "llvm-ranlib-11" "llvm-ranlib-10" "llvm-ranlib-9") + +set (CMAKE_AR "${LLVM_AR_PATH}" CACHE FILEPATH "" FORCE) +set (CMAKE_RANLIB "${LLVM_RANLIB_PATH}" CACHE FILEPATH "" FORCE) + +set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") + +set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE) + +set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld") +set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld") + +set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) + +set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) +set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) + +set (USE_MUSL 1) +add_definitions(-DUSE_MUSL=1) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 5ff85fa85c2..676654452de 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -1,16 +1,5 @@ # Third-party libraries may have substandard code. -# Put all targets defined here and in added subfolders under "contrib/" folder in GUI-based IDEs by default. -# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they will -# appear not in "contrib/" as originally planned here. -get_filename_component (_current_dir_name "${CMAKE_CURRENT_LIST_DIR}" NAME) -if (CMAKE_FOLDER) - set (CMAKE_FOLDER "${CMAKE_FOLDER}/${_current_dir_name}") -else () - set (CMAKE_FOLDER "${_current_dir_name}") -endif () -unset (_current_dir_name) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") @@ -49,6 +38,14 @@ add_subdirectory (replxx-cmake) add_subdirectory (unixodbc-cmake) add_subdirectory (nanodbc-cmake) +if (USE_INTERNAL_CAPNP_LIBRARY AND NOT MISSING_INTERNAL_CAPNP_LIBRARY) + add_subdirectory(capnproto-cmake) +endif () + +if (ENABLE_FUZZING) + add_subdirectory (libprotobuf-mutator-cmake) +endif() + if (USE_YAML_CPP) add_subdirectory (yaml-cpp-cmake) endif() @@ -348,3 +345,76 @@ endif() if (USE_S2_GEOMETRY) add_subdirectory(s2geometry-cmake) endif() + +# Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. +# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear +# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, +# instead of controlling it via CMAKE_FOLDER. + +function (ensure_target_rooted_in _target _folder) + # Skip INTERFACE library targets, since FOLDER property is not available for them. + get_target_property (_target_type "${_target}" TYPE) + if (_target_type STREQUAL "INTERFACE_LIBRARY") + return () + endif () + + # Read the original FOLDER property value, if any. + get_target_property (_folder_prop "${_target}" FOLDER) + + # Normalize that value, so we avoid possible repetitions in folder names. + + if (NOT _folder_prop) + set (_folder_prop "") + endif () + + if (CMAKE_FOLDER AND _folder_prop MATCHES "^${CMAKE_FOLDER}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder AND _folder_prop MATCHES "^${_folder}/(.*)\$") + set (_folder_prop "${CMAKE_MATCH_1}") + endif () + + if (_folder) + set (_folder_prop "${_folder}/${_folder_prop}") + endif () + + if (CMAKE_FOLDER) + set (_folder_prop "${CMAKE_FOLDER}/${_folder_prop}") + endif () + + # Set the updated FOLDER property value back. + set_target_properties ("${_target}" PROPERTIES FOLDER "${_folder_prop}") +endfunction () + +function (ensure_own_targets_are_rooted_in _dir _folder) + get_directory_property (_targets DIRECTORY "${_dir}" BUILDSYSTEM_TARGETS) + foreach (_target IN LISTS _targets) + ensure_target_rooted_in ("${_target}" "${_folder}") + endforeach () +endfunction () + +function (ensure_all_targets_are_rooted_in _dir _folder) + ensure_own_targets_are_rooted_in ("${_dir}" "${_folder}") + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_folder}") + endforeach () +endfunction () + +function (organize_ide_folders_2_level _dir) + get_filename_component (_dir_name "${_dir}" NAME) + ensure_own_targets_are_rooted_in ("${_dir}" "${_dir_name}") + + # Note, that we respect only first two levels of nesting, we don't want to + # reorganize target folders further within each third-party dir. + + get_property (_sub_dirs DIRECTORY "${_dir}" PROPERTY SUBDIRECTORIES) + foreach (_sub_dir IN LISTS _sub_dirs) + get_filename_component (_sub_dir_name "${_sub_dir}" NAME) + ensure_all_targets_are_rooted_in ("${_sub_dir}" "${_dir_name}/${_sub_dir_name}") + endforeach () +endfunction () + +organize_ide_folders_2_level ("${CMAKE_CURRENT_LIST_DIR}") diff --git a/contrib/boringssl b/contrib/boringssl index a6a2e2ab3e4..c1e01a441d6 160000 --- a/contrib/boringssl +++ b/contrib/boringssl @@ -1 +1 @@ -Subproject commit a6a2e2ab3e44d97ce98e51c558e989f211de7eb3 +Subproject commit c1e01a441d6db234f4f12e63a7657d1f9e6db9c1 diff --git a/contrib/boringssl-cmake/CMakeLists.txt b/contrib/boringssl-cmake/CMakeLists.txt index 4502d6e9d42..474e32f3b91 100644 --- a/contrib/boringssl-cmake/CMakeLists.txt +++ b/contrib/boringssl-cmake/CMakeLists.txt @@ -4,7 +4,7 @@ # This file is created by generate_build_files.py and edited accordingly. -cmake_minimum_required(VERSION 3.0) +cmake_minimum_required(VERSION 3.5) project(BoringSSL LANGUAGES C CXX) @@ -20,12 +20,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") endif() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common") - if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") - else() - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") - endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common -std=c11") endif() # pthread_rwlock_t requires a feature flag. @@ -55,7 +50,7 @@ add_definitions(-DBORINGSSL_IMPLEMENTATION) # builds. if(NOT OPENSSL_NO_ASM AND CMAKE_OSX_ARCHITECTURES) list(LENGTH CMAKE_OSX_ARCHITECTURES NUM_ARCHES) - if(NOT ${NUM_ARCHES} EQUAL 1) + if(NOT NUM_ARCHES EQUAL 1) message(FATAL_ERROR "Universal binaries not supported.") endif() list(GET CMAKE_OSX_ARCHITECTURES 0 CMAKE_SYSTEM_PROCESSOR) @@ -78,7 +73,13 @@ elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") set(ARCH "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386") - set(ARCH "x86") + # cmake uses `uname -p` to set the system processor, but Solaris + # systems support multiple architectures. + if((${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") AND CMAKE_SIZEOF_VOID_P EQUAL 8) + set(ARCH "x86_64") + else() + set(ARCH "x86") + endif() elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686") set(ARCH "x86") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") @@ -289,6 +290,21 @@ set( mac-x86_64/crypto/test/trampoline-x86_64.S ) +set( + CRYPTO_win_aarch64_SOURCES + + win-aarch64/crypto/chacha/chacha-armv8.S + win-aarch64/crypto/fipsmodule/aesv8-armx64.S + win-aarch64/crypto/fipsmodule/armv8-mont.S + win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S + win-aarch64/crypto/fipsmodule/ghashv8-armx64.S + win-aarch64/crypto/fipsmodule/sha1-armv8.S + win-aarch64/crypto/fipsmodule/sha256-armv8.S + win-aarch64/crypto/fipsmodule/sha512-armv8.S + win-aarch64/crypto/fipsmodule/vpaes-armv8.S + win-aarch64/crypto/test/trampoline-armv8.S +) + set( CRYPTO_win_x86_SOURCES @@ -331,9 +347,9 @@ set( win-x86_64/crypto/test/trampoline-x86_64.asm ) -if(APPLE AND ${ARCH} STREQUAL "aarch64") +if(APPLE AND ARCH STREQUAL "aarch64") set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES}) -elseif(APPLE AND ${ARCH} STREQUAL "arm") +elseif(APPLE AND ARCH STREQUAL "arm") set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES}) elseif(APPLE) set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES}) @@ -360,6 +376,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_object.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_octet.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_print.c" + "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strex.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_strnid.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_time.c" "${BORINGSSL_SOURCE_DIR}/crypto/asn1/a_type.c" @@ -389,6 +406,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/bio/printf.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket.c" "${BORINGSSL_SOURCE_DIR}/crypto/bio/socket_helper.c" + "${BORINGSSL_SOURCE_DIR}/crypto/blake2/blake2.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/bn_asn1.c" "${BORINGSSL_SOURCE_DIR}/crypto/bn_extra/convert.c" "${BORINGSSL_SOURCE_DIR}/crypto/buf/buf.c" @@ -413,6 +431,7 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/conf/conf.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-fuchsia.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-linux.c" + "${BORINGSSL_SOURCE_DIR}/crypto/cpu-aarch64-win.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm-linux.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-arm.c" "${BORINGSSL_SOURCE_DIR}/crypto/cpu-intel.c" @@ -452,7 +471,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/ex_data.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/bcm.c" "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/fips_shared_support.c" - "${BORINGSSL_SOURCE_DIR}/crypto/fipsmodule/is_fips.c" "${BORINGSSL_SOURCE_DIR}/crypto/hkdf/hkdf.c" "${BORINGSSL_SOURCE_DIR}/crypto/hpke/hpke.c" "${BORINGSSL_SOURCE_DIR}/crypto/hrss/hrss.c" @@ -499,13 +517,13 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/trust_token/voprf.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_digest.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_sign.c" - "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_strex.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/a_verify.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/algorithm.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/asn1_gen.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_dir.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/by_file.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/i2d_pr.c" + "${BORINGSSL_SOURCE_DIR}/crypto/x509/name_print.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/rsa_pss.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_crl.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/t_req.c" @@ -519,7 +537,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_ext.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_lu.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_obj.c" - "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_r2x.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_req.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_set.c" "${BORINGSSL_SOURCE_DIR}/crypto/x509/x509_trs.c" @@ -589,6 +606,8 @@ add_library( "${BORINGSSL_SOURCE_DIR}/ssl/d1_srtp.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_method.cc" "${BORINGSSL_SOURCE_DIR}/ssl/dtls_record.cc" + "${BORINGSSL_SOURCE_DIR}/ssl/encrypted_client_hello.cc" + "${BORINGSSL_SOURCE_DIR}/ssl/extensions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handoff.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake.cc" "${BORINGSSL_SOURCE_DIR}/ssl/handshake_client.cc" @@ -611,7 +630,6 @@ add_library( "${BORINGSSL_SOURCE_DIR}/ssl/ssl_versions.cc" "${BORINGSSL_SOURCE_DIR}/ssl/ssl_x509.cc" "${BORINGSSL_SOURCE_DIR}/ssl/t1_enc.cc" - "${BORINGSSL_SOURCE_DIR}/ssl/t1_lib.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_both.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_client.cc" "${BORINGSSL_SOURCE_DIR}/ssl/tls13_enc.cc" @@ -633,6 +651,7 @@ add_executable( "${BORINGSSL_SOURCE_DIR}/tool/digest.cc" "${BORINGSSL_SOURCE_DIR}/tool/fd.cc" "${BORINGSSL_SOURCE_DIR}/tool/file.cc" + "${BORINGSSL_SOURCE_DIR}/tool/generate_ech.cc" "${BORINGSSL_SOURCE_DIR}/tool/generate_ed25519.cc" "${BORINGSSL_SOURCE_DIR}/tool/genrsa.cc" "${BORINGSSL_SOURCE_DIR}/tool/pkcs12.cc" diff --git a/contrib/capnproto b/contrib/capnproto index a00ccd91b37..c8189ec3c27 160000 --- a/contrib/capnproto +++ b/contrib/capnproto @@ -1 +1 @@ -Subproject commit a00ccd91b3746ef2ab51d40fe3265829949d1ace +Subproject commit c8189ec3c27dacbd4a3288e682473010e377f593 diff --git a/contrib/capnproto-cmake/CMakeLists.txt b/contrib/capnproto-cmake/CMakeLists.txt index 9f6e076cc7d..05446355535 100644 --- a/contrib/capnproto-cmake/CMakeLists.txt +++ b/contrib/capnproto-cmake/CMakeLists.txt @@ -45,6 +45,7 @@ set (CAPNP_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/serialize-packed.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/stream.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-loader.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/dynamic.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/stringify.c++" @@ -63,6 +64,7 @@ set (CAPNPC_SRCS "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/lexer.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/grammar.capnp.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/parser.c++" + "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/generics.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/node-translator.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/compiler/compiler.c++" "${CAPNPROTO_SOURCE_DIR}/capnp/schema-parser.c++" diff --git a/contrib/fastops b/contrib/fastops index 012b777df9e..1460583af7d 160000 --- a/contrib/fastops +++ b/contrib/fastops @@ -1 +1 @@ -Subproject commit 012b777df9e2d145a24800a6c8c3d4a0249bb09e +Subproject commit 1460583af7d13c0e980ce46aec8ee9400314669a diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h index 6a03a231a0e..0aa4033f859 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h @@ -18,8 +18,10 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE_MEMALIGN -#define JEMALLOC_OVERRIDE_VALLOC +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE_MEMALIGN + #define JEMALLOC_OVERRIDE_VALLOC +#endif /* * At least Linux omits the "const" in: diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h index 8506237729d..2e35e7b6249 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h @@ -1,6 +1,6 @@ // OSX does not have this for system alloc functions, so you will get // "exception specification in declaration" error. -#if defined(__APPLE__) || defined(__FreeBSD__) +#if defined(__APPLE__) || defined(__FreeBSD__) || defined(USE_MUSL) # undef JEMALLOC_NOTHROW # define JEMALLOC_NOTHROW diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index d5cf0e719ef..44ff2d9fad1 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -13,12 +13,14 @@ * Define overrides for non-standard allocator-related functions if they are * present on the system. */ -#define JEMALLOC_OVERRIDE___LIBC_CALLOC -#define JEMALLOC_OVERRIDE___LIBC_FREE -#define JEMALLOC_OVERRIDE___LIBC_MALLOC -#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN -#define JEMALLOC_OVERRIDE___LIBC_REALLOC -#define JEMALLOC_OVERRIDE___LIBC_VALLOC +#if !defined(USE_MUSL) + #define JEMALLOC_OVERRIDE___LIBC_CALLOC + #define JEMALLOC_OVERRIDE___LIBC_FREE + #define JEMALLOC_OVERRIDE___LIBC_MALLOC + #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN + #define JEMALLOC_OVERRIDE___LIBC_REALLOC + #define JEMALLOC_OVERRIDE___LIBC_VALLOC +#endif /* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 0cfb4191619..2ec6dbff1a1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -47,6 +47,7 @@ set(SRCS ) add_library(cxx ${SRCS}) +set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") target_include_directories(cxx SYSTEM BEFORE PUBLIC $) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) @@ -56,6 +57,10 @@ if (USE_UNWIND) target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) endif () +if (USE_MUSL) + target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) +endif () + # Override the deduced attribute support that causes error. if (OS_DARWIN AND COMPILER_GCC) add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0bb5d663633..425111d9b26 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -22,6 +22,7 @@ set(SRCS ) add_library(cxxabi ${SRCS}) +set_target_properties(cxxabi PROPERTIES FOLDER "contrib/libcxxabi-cmake") # Third party library may have substandard code. target_compile_options(cxxabi PRIVATE -w) diff --git a/contrib/libprotobuf-mutator b/contrib/libprotobuf-mutator new file mode 160000 index 00000000000..ffd86a32874 --- /dev/null +++ b/contrib/libprotobuf-mutator @@ -0,0 +1 @@ +Subproject commit ffd86a32874e5c08a143019aad1aaf0907294c9f diff --git a/contrib/libprotobuf-mutator-cmake/CMakeLists.txt b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt new file mode 100644 index 00000000000..978b1e732ba --- /dev/null +++ b/contrib/libprotobuf-mutator-cmake/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libprotobuf-mutator) + +add_library(protobuf-mutator + ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_macro.cc + ${LIBRARY_DIR}/src/libfuzzer/libfuzzer_mutator.cc + ${LIBRARY_DIR}/src/binary_format.cc + ${LIBRARY_DIR}/src/mutator.cc + ${LIBRARY_DIR}/src/text_format.cc + ${LIBRARY_DIR}/src/utf8_fix.cc) + +target_include_directories(protobuf-mutator BEFORE PRIVATE "${LIBRARY_DIR}") +target_include_directories(protobuf-mutator BEFORE PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") + +target_link_libraries(protobuf-mutator ${Protobuf_LIBRARY}) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 1a9f5e50abd..155853a0bca 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -39,6 +39,7 @@ set(LIBUNWIND_SOURCES ${LIBUNWIND_ASM_SOURCES}) add_library(unwind ${LIBUNWIND_SOURCES}) +set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) diff --git a/contrib/openldap-cmake/linux_x86_64/include/portable.h b/contrib/openldap-cmake/linux_x86_64/include/portable.h index 2924b6713a4..ab7052bda91 100644 --- a/contrib/openldap-cmake/linux_x86_64/include/portable.h +++ b/contrib/openldap-cmake/linux_x86_64/include/portable.h @@ -98,7 +98,9 @@ #define HAVE_BCOPY 1 /* Define to 1 if you have the header file. */ -#define HAVE_BITS_TYPES_H 1 +#if !defined(USE_MUSL) + #define HAVE_BITS_TYPES_H 1 +#endif /* Define to 1 if you have the `chroot' function. */ #define HAVE_CHROOT 1 diff --git a/contrib/replxx b/contrib/replxx index f97765df14f..b0c266c2d8a 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit f97765df14f4a6236d69b8f14b53ef2051ebd95a +Subproject commit b0c266c2d8a835784181e17292b421848c78c6b8 diff --git a/contrib/sysroot b/contrib/sysroot index 002415524b5..6172893931e 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit 002415524b5d14124bb8a61a3ce7ac65774f5479 +Subproject commit 6172893931e19b028f9cabb7095a44361be863df diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index f4b99603554..2e37522f1b4 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -189,7 +189,7 @@ function clone_submodules ) git submodule sync - git submodule update --depth 1 --init --recursive "${SUBMODULES_TO_UPDATE[@]}" + git submodule update --depth 1 --init "${SUBMODULES_TO_UPDATE[@]}" git submodule foreach git reset --hard git submodule foreach git checkout @ -f git submodule foreach git clean -xfd diff --git a/docker/test/integration/kerberized_hadoop/Dockerfile b/docker/test/integration/kerberized_hadoop/Dockerfile index 11da590f901..00944cbfc00 100644 --- a/docker/test/integration/kerberized_hadoop/Dockerfile +++ b/docker/test/integration/kerberized_hadoop/Dockerfile @@ -1,16 +1,22 @@ # docker build -t clickhouse/kerberized-hadoop . FROM sequenceiq/hadoop-docker:2.7.0 -RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo -RUN sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +RUN sed -i -e 's/^\#baseurl/baseurl/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's/^mirrorlist/#mirrorlist/' /etc/yum.repos.d/CentOS-Base.repo && \ + sed -i -e 's#http://mirror.centos.org/#http://vault.centos.org/#' /etc/yum.repos.d/CentOS-Base.repo + +# https://community.letsencrypt.org/t/rhel-centos-6-openssl-client-compatibility-after-dst-root-ca-x3-expiration/161032/81 +RUN sed -i s/xMDkzMDE0MDExNVow/0MDkzMDE4MTQwM1ow/ /etc/pki/tls/certs/ca-bundle.crt + RUN yum clean all && \ rpm --rebuilddb && \ yum -y update && \ yum -y install yum-plugin-ovl && \ yum --quiet -y install krb5-workstation.x86_64 + RUN cd /tmp && \ - curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ + curl http://archive.apache.org/dist/commons/daemon/source/commons-daemon-1.0.15-src.tar.gz -o commons-daemon-1.0.15-src.tar.gz && \ tar xzf commons-daemon-1.0.15-src.tar.gz && \ cd commons-daemon-1.0.15-src/src/native/unix && \ ./configure && \ diff --git a/docker/test/pvs/Dockerfile b/docker/test/pvs/Dockerfile index f4675d35819..77cbd910922 100644 --- a/docker/test/pvs/Dockerfile +++ b/docker/test/pvs/Dockerfile @@ -37,7 +37,9 @@ RUN set -x \ || echo "WARNING: Some file was just downloaded from the internet without any validation and we are installing it into the system"; } \ && dpkg -i "${PKG_VERSION}.deb" -CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ +ENV CCACHE_DIR=/test_output/ccache + +CMD echo "Running PVS version $PKG_VERSION" && mkdir -p $CCACHE_DIR && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang\+\+-13 \ && ninja re2_st clickhouse_grpc_protos \ && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \ diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 424bfe71b15..0118e6df764 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# yaml check is not the best one + cd /ClickHouse/utils/check-style || echo -e "failure\tRepo not found" > /test_output/check_status.tsv ./check-style -n |& tee /test_output/style_output.txt ./check-typos |& tee /test_output/typos_output.txt diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 7cba682e772..eaea4cc69ed 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -47,13 +47,17 @@ then fi URL="https://builds.clickhouse.com/master/${DIR}/clickhouse" +echo echo "Will download ${URL}" +echo curl -O "${URL}" && chmod a+x clickhouse && +echo echo "Successfully downloaded the ClickHouse binary, you can run it as: ./clickhouse" if [ "${OS}" = "Linux" ] then + echo echo "You can also install it: sudo ./clickhouse install" fi diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index d188b4bb147..0cd69312bb6 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -3,9 +3,15 @@ toc_priority: 65 toc_title: Build on Mac OS X --- +# You don't have to build ClickHouse + +You can install ClickHouse as follows: https://clickhouse.com/#quick-start +Choose Mac x86 or M1. + # How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x} -Build should work on x86_64 (Intel) and arm64 (Apple Silicon) based macOS 10.15 (Catalina) and higher with recent Xcode's native AppleClang, or Homebrew's vanilla Clang or GCC compilers. +Build should work on x86_64 (Intel) and arm64 (Apple Silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang. +It is always recommended to use `clang` compiler. It is possible to use XCode's `AppleClang` or `gcc` but it's strongly discouraged. ## Install Homebrew {#install-homebrew} @@ -45,18 +51,6 @@ git clone --recursive git@github.com:ClickHouse/ClickHouse.git ## Build ClickHouse {#build-clickhouse} -To build using Xcode's native AppleClang compiler: - -``` bash -cd ClickHouse -rm -rf build -mkdir build -cd build -cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -cmake --build . --config RelWithDebInfo -cd .. -``` - To build using Homebrew's vanilla Clang compiler: ``` bash @@ -69,7 +63,19 @@ cmake --build . --config RelWithDebInfo cd .. ``` -To build using Homebrew's vanilla GCC compiler: +To build using Xcode's native AppleClang compiler (this option is strongly not recommended; use the option above): + +``` bash +cd ClickHouse +rm -rf build +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. +cmake --build . --config RelWithDebInfo +cd .. +``` + +To build using Homebrew's vanilla GCC compiler (this option is absolutely not recommended, I'm wondering why do we ever have it): ``` bash cd ClickHouse diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index be6a08d397c..024ce27d60d 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -37,7 +37,7 @@ Next, you need to download the source files onto your working machine. This is c In the command line terminal run: - git clone --recursive git@github.com:your_github_username/ClickHouse.git + git clone git@github.com:your_github_username/ClickHouse.git cd ClickHouse Note: please, substitute *your_github_username* with what is appropriate! @@ -79,7 +79,7 @@ After successfully running this command you will be able to pull updates from th Working with submodules in git could be painful. Next commands will help to manage it: - # ! each command accepts --recursive + # ! each command accepts # Update remote URLs for submodules. Barely rare case git submodule sync # Add new submodules @@ -92,16 +92,16 @@ Working with submodules in git could be painful. Next commands will help to mana The next commands would help you to reset all submodules to the initial state (!WARNING! - any changes inside will be deleted): # Synchronizes submodules' remote URL with .gitmodules - git submodule sync --recursive + git submodule sync # Update the registered submodules with initialize not yet initialized - git submodule update --init --recursive + git submodule update --init # Reset all changes done after HEAD git submodule foreach git reset --hard # Clean files from .gitignore git submodule foreach git clean -xfd # Repeat last 4 commands for all submodule - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index b8a9bb19a05..52876674475 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -36,7 +36,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name Create a table in ClickHouse which allows to read data from MongoDB collection: -``` text +``` sql CREATE TABLE mongo_table ( key UInt64, @@ -46,7 +46,7 @@ CREATE TABLE mongo_table To read from an SSL secured MongoDB server: -``` text +``` sql CREATE TABLE mongo_table_ssl ( key UInt64, diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 4f473279067..aeaf39e28cb 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -320,7 +320,7 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 - `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with strings. Can be used for optimization of `equals`, `like` and `in` expressions. + Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions. - `n` — ngram size, - `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well). @@ -337,7 +337,9 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`. - For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. + For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. + + The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem). Example of index creation for `Map` data type @@ -346,9 +348,6 @@ INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1 INDEX map_key_index mapValues(map_column) TYPE bloom_filter GRANULARITY 1 ``` - The following functions can use it: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md). - - ``` sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 6593a5dc17f..708dab6fb7d 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -5,7 +5,7 @@ toc_title: Distributed # Distributed Table Engine {#distributed} -Tables with Distributed engine do not store any data by their own, but allow distributed query processing on multiple servers. +Tables with Distributed engine do not store any data of their own, but allow distributed query processing on multiple servers. Reading is automatically parallelized. During a read, the table indexes on remote servers are used, if there are any. The Distributed engine accepts parameters: @@ -167,20 +167,20 @@ If this parameter is set to `true`, the write operation selects the first health If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data. -To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). +To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weights` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user’s ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). -A simple reminder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. +A simple remainder from the division is a limited solution for sharding and isn’t always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. -SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer the old data to it. You can write new data with a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. +SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you do not have to transfer old data into it. Instead, you can write new data to it by using a heavier weight – the data will be distributed slightly unevenly, but queries will work correctly and efficiently. You should be concerned about the sharding scheme in the following cases: - Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient. - A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. -Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. +Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used. diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 508cd51e9f8..4a97ab6589d 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -27,7 +27,7 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun {% include 'install/deb.sh' %} ``` -If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). +You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/). diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 70b7d59b037..eaf7a96ce42 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -127,6 +127,9 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--secure` – If specified, will connect to server over secure connection. - `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). +- `--hardware-utilization` — Print hardware utilization information in progress bar. +- `--print-profile-events` – Print `ProfileEvents` packets. +- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 700c804ea66..6dd61ac6ad4 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -22,10 +22,13 @@ toc_title: Adopters | ApiRoad | API marketplace | Analytics | — | — | [Blog post, Nov 2018, Mar 2020](https://pixeljets.com/blog/clickhouse-vs-elasticsearch/) | | Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | | ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | | Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | | Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) | +| Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | | Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | | BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | +| BiliBili | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) | | Bloomberg | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | | Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | | Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | @@ -33,6 +36,7 @@ toc_title: Adopters | CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | | CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | | Checkly | Software Development | Analytics | — | — | [Tweet, October 2021](https://twitter.com/tim_nolet/status/1445810665743081474?s=20) | +| ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | | Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | | Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | | Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | @@ -48,6 +52,7 @@ toc_title: Adopters | Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | | Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | | Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| Ecommpay | Payment Processing | Logs | — | — | [Video, Nov 2019](https://www.youtube.com/watch?v=d3GdZTOWGLk) | | Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | | eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | | Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | @@ -57,9 +62,11 @@ toc_title: Adopters | FunCorp | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | | Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | +| Gigapipe | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) | | Glaber | Monitoring | Main product | — | — | [Website](https://glaber.io/) | | GraphCDN | CDN | Traffic Analytics | — | — | [Blog Post in English, August 2021](https://altinity.com/blog/delivering-insight-on-graphql-apis-with-clickhouse-at-graphcdn/) | | HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| Hydrolix | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) | | ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | | Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.com/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infobaleen | AI markting tool | Analytics | — | — | [Official site](https://infobaleen.com) | @@ -71,9 +78,11 @@ toc_title: Adopters | Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | | Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | | Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | | Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | | Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | | Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.com/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | +| KGK Global | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) | | Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | | LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | @@ -88,7 +97,10 @@ toc_title: Adopters | Netskope | Network Security | — | — | — | [Job advertisement, March 2021](https://www.mendeley.com/careers/job/senior-software-developer-backend-developer-1346348) | | NIC Labs | Network Monitoring | RaTA-DNS | — | — | [Blog post, March 2021](https://niclabs.cl/ratadns/2021/03/Clickhouse) | | NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | +| Noction | Network Technology | Main Product | — | — | [Official Website](https://www.noction.com/news/irp-3-11-remote-triggered-blackholing-capability) | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | +| Ok.ru | Social Network | — | 72 servers | 810 TB compressed, 50bn rows/day, 1.5 TB/day | [SmartData conference, Oct 2021](https://assets.ctfassets.net/oxjq45e8ilak/4JPHkbJenLgZhBGGyyonFP/57472ec6987003ec4078d0941740703b/____________________ClickHouse_______________________.pdf) | +| Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, Oct 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | | OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | | OZON | E-commerce | — | — | — | [Official website](https://job.ozon.ru/vacancy/razrabotchik-clickhouse-ekspluatatsiya-40991870/) | | Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | @@ -103,6 +115,7 @@ toc_title: Adopters | Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | | Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | | Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| Replica | Urban Planning | Analytics | — | — | [Job advertisement](https://boards.greenhouse.io/replica/jobs/5547732002?gh_jid=5547732002) | | Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | | Rollbar | Software Development | Main Product | — | — | [Official Website](https://www.rollbar.com) | | Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | @@ -116,6 +129,7 @@ toc_title: Adopters | Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | | seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | | SGK | Government Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | | Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | | Sipfront | Software Development | Analytics | — | — | [Tweet, October 2021](https://twitter.com/andreasgranig/status/1446404332337913895?s=20) | | SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | @@ -129,12 +143,14 @@ toc_title: Adopters | Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | +| Tesla | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | +| Timeflow | Software | Analytics | — | — | [Blog](https://timeflow.systems/why-we-moved-from-druid-to-clickhouse/ ) | | Tinybird | Real-time Data Products | Data processing | — | — | [Official website](https://www.tinybird.co/) | -| Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| Traffic Stars | AD network | — | 300 servers in Europe/US | 1.8 PiB, 700 000 insert rps (as of 2021) | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | | Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/uber.pdf) | | UTMSTAT | Analytics | Main product | — | — | [Blog post, June 2020](https://vc.ru/tribuna/133956-striming-dannyh-iz-servisa-skvoznoy-analitiki-v-clickhouse) | | VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| VMWare | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) | +| VMware | Cloud | VeloCloud, SDN | — | — | [Product documentation](https://docs.vmware.com/en/vRealize-Operations-Manager/8.3/com.vmware.vcom.metrics.doc/GUID-A9AD72E1-C948-4CA2-971B-919385AB3CA8.html) | | Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | | Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | | Wildberries | E-commerce | | — | — | [Official website](https://it.wildberries.ru/) | @@ -148,23 +164,11 @@ toc_title: Adopters | Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | | Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) | | Yotascale | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) | +| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | | ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | | МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | | ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | | Цифровой Рабочий | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) | -| kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | -| Tesla | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) | -| KGK Global | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) | -| BiliBili | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) | -| Gigapipe | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) | -| Hydrolix | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) | -| Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | -| SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | -| ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | -| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | -| Beeline | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) | -| Ecommpay | Payment Processing | Logs | — | — | [Video, Nov 2019](https://www.youtube.com/watch?v=d3GdZTOWGLk) | -| Omnicomm | Transportation Monitoring | — | — | — | [Facebook post, Oct 2021](https://www.facebook.com/OmnicommTeam/posts/2824479777774500) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 19567ec29fb..cdf49678570 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -643,7 +643,7 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa ## max_concurrent_queries {#max-concurrent-queries} -The maximum number of simultaneously processed queries related to MergeTree table. Queries may be limited by other settings: [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). +The maximum number of simultaneously processed queries related to MergeTree table. Queries may be limited by other settings: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). !!! info "Note" These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. @@ -659,6 +659,21 @@ Possible values: 100 ``` +## max_concurrent_queries_for_user {#max-concurrent-queries-for-user} + +The maximum number of simultaneously processed queries related to MergeTree table per user. + +Possible values: + +- Positive integer. +- 0 — Disabled. + +**Example** + +``` xml +5 +``` + ## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users} Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries. diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index d7a73de5046..bdcc13340f0 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -10,7 +10,7 @@ Columns: - `[]` — All users share the same quota. - `['user_name']` — Connections with the same user name share the same quota. - `['ip_address']` — Connections from the same IP share the same quota. - - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. + - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`. - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`. - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds. diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 5cbbe71b3e0..54c66bb8d13 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -70,7 +70,7 @@ For HDD, enable the write cache. ## File System {#file-system} -Ext4 is the most reliable option. Set the mount options `noatime, nobarrier`. +Ext4 is the most reliable option. Set the mount options `noatime`. XFS is also suitable, but it hasn’t been as thoroughly tested with ClickHouse. Most other file systems should also work fine. File systems with delayed allocation work better. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index f525ea64aa2..40f9db12315 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -53,15 +53,17 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings - [flat](#flat) - [hashed](#dicts-external_dicts_dict_layout-hashed) - [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed) -- [cache](#cache) -- [ssd_cache](#ssd-cache) -- [direct](#direct) -- [range_hashed](#range-hashed) - [complex_key_hashed](#complex-key-hashed) +- [complex_key_sparse_hashed](#complex-key-sparse-hashed) +- [hashed_array](#dicts-external_dicts_dict_layout-hashed-array) +- [complex_key_hashed_array](#complex-key-hashed-array) +- [range_hashed](#range-hashed) - [complex_key_range_hashed](#complex-key-range-hashed) +- [cache](#cache) - [complex_key_cache](#complex-key-cache) - [ssd_cache](#ssd-cache) -- [ssd_complex_key_cache](#complex-key-ssd-cache) +- [complex_key_ssd_cache](#complex-key-ssd-cache) +- [direct](#direct) - [complex_key_direct](#complex-key-direct) - [ip_trie](#ip-trie) @@ -151,10 +153,69 @@ Configuration example: ``` +or + ``` sql LAYOUT(COMPLEX_KEY_HASHED()) ``` +### complex_key_sparse_hashed {#complex-key-sparse-hashed} + +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed). + +Configuration example: + +``` xml + + + +``` + +or + +``` sql +LAYOUT(COMPLEX_KEY_SPARSE_HASHED()) +``` + +### hashed_array {#dicts-external_dicts_dict_layout-hashed-array} + +The dictionary is completely stored in memory. Each attribute is stored in an array. The key attribute is stored in the form of a hashed table where value is an index in the attributes array. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items. + +All types of sources are supported. When updating, data (from a file or from a table) is read in its entirety. + +Configuration example: + +``` xml + + + + +``` + +or + +``` sql +LAYOUT(HASHED_ARRAY()) +``` + +### complex_key_hashed_array {#complex-key-hashed-array} + +This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Similar to [hashed_array](#dicts-external_dicts_dict_layout-hashed-array). + +Configuration example: + +``` xml + + + +``` + +or + +``` sql +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +``` + ### range_hashed {#range-hashed} The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. @@ -302,8 +363,9 @@ When searching for a dictionary, the cache is searched first. For each block of If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. -For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired, and it is re-requested the next time it needs to be used this behaviour can be configured with setting `allow_read_expired_keys`. -This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the `system.dictionaries` table. +For cache dictionaries, the expiration [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. + +This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. If setting `allow_read_expired_keys` is set to 1, by default 0. Then dictionary can support asynchronous updates. If a client requests keys and all of them are in cache, but some of them are expired, then dictionary will return expired keys for a client and request them asynchronously from the source. @@ -368,7 +430,7 @@ Similar to `cache`, but stores data on SSD and index in RAM. All cache dictionar 1048576 - /var/lib/clickhouse/clickhouse_dictionaries/test_dict + /var/lib/clickhouse/user_files/test_dict ``` @@ -377,7 +439,7 @@ or ``` sql LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 - PATH ./user_files/test_dict)) + PATH '/var/lib/clickhouse/user_files/test_dict')) ``` ### complex_key_ssd_cache {#complex-key-ssd-cache} @@ -491,4 +553,3 @@ dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. Data must completely fit into RAM. - diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 6864ba7705b..11c1e9ad3cf 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2427,3 +2427,39 @@ Type: [UInt32](../../sql-reference/data-types/int-uint.md). **See Also** - [shardNum()](#shard-num) function example also contains `shardCount()` function call. + +## getOSKernelVersion {#getoskernelversion} + +Returns a string with the current OS kernel version. + +**Syntax** + +``` sql +getOSKernelVersion() +``` + +**Arguments** + +- None. + +**Returned value** + +- The current OS kernel version. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT getOSKernelVersion(); +``` + +Result: + +``` text +┌─getOSKernelVersion()────┐ +│ Linux 4.15.0-55-generic │ +└─────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 718d5a977b9..eeca5209ea5 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -270,3 +270,40 @@ Result: │ [['abc','123'],['8','"hkl"']] │ └───────────────────────────────────────────────────────────────────────┘ ``` + +## ngrams {#ngrams} + +Splits the UTF-8 string into n-grams of `ngramsize` symbols. + +**Syntax** + +``` sql +ngrams(string, ngramsize) +``` + +**Arguments** + +- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). + +**Returned values** + +- Array with n-grams. + +Type: [Array](../../sql-reference/data-types/array.md)([FixedString](../../sql-reference/data-types/fixedstring.md)). + +**Example** + +Query: + +``` sql +SELECT ngrams('ClickHouse', 3); +``` + +Result: + +``` text +┌─ngrams('ClickHouse', 3)───────────────────────────┐ +│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │ +└───────────────────────────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 7c5ea732122..df30a0fb0d2 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ Subquery is another `SELECT` query that may be specified in parenthesis inside ` When `FINAL` is specified, ClickHouse fully merges the data before returning the result and thus performs all data transformations that happen during merges for the given table engine. -It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family (except `GraphiteMergeTree`). Also supported for: +It is applicable when selecting data from tables that use the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-engine family. Also supported for: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) versions of `MergeTree` engines. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), and [MaterializedView](../../../engines/table-engines/special/materializedview.md) engines that operate over other engines, provided they were created over `MergeTree`-engine tables. diff --git a/docs/ja/development/developer-instruction.md b/docs/ja/development/developer-instruction.md index 3eafbf0481d..c95dc0e2ea4 100644 --- a/docs/ja/development/developer-instruction.md +++ b/docs/ja/development/developer-instruction.md @@ -41,7 +41,7 @@ Ubuntuでこれを行うには、コマンドラインターミナルで実行 コマンドラインターミナルで実行: - git clone --recursive git@github.com:your_github_username/ClickHouse.git + git clone git@github.com:your_github_username/ClickHouse.git cd ClickHouse 注:、代理して下さい *your_github_username* 適切なもので! @@ -83,7 +83,7 @@ ClickHouseリポジトリは以下を使用します `submodules`. That is what Gitでサブモジュールを操作するのは苦痛です。 次のコマンドは管理に役立ちます: - # ! each command accepts --recursive + # ! each command accepts # Update remote URLs for submodules. Barely rare case git submodule sync # Add new submodules @@ -96,16 +96,16 @@ Gitでサブモジュールを操作するのは苦痛です。 次のコマン 次のコマンドは、すべてのサブモジュールを初期状態にリセットするのに役立ちます(!ツづツつキツ。 -内部の変更は削除されます): # Synchronizes submodules' remote URL with .gitmodules - git submodule sync --recursive + git submodule sync # Update the registered submodules with initialize not yet initialized - git submodule update --init --recursive + git submodule update --init # Reset all changes done after HEAD git submodule foreach git reset --hard # Clean files from .gitignore git submodule foreach git clean -xfd # Repeat last 4 commands for all submodule - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/ja/operations/tips.md b/docs/ja/operations/tips.md index 1967654af79..b5c67f0e17f 100644 --- a/docs/ja/operations/tips.md +++ b/docs/ja/operations/tips.md @@ -78,7 +78,7 @@ HDDの場合、ライトキャッシュを有効にします。 ## ファイルシス {#file-system} -Ext4は最も信頼性の高いオプションです。 マウントオプションの設定 `noatime, nobarrier`. +Ext4は最も信頼性の高いオプションです。 マウントオプションの設定 `noatime`. XFSも適していますが、ClickHouseで徹底的にテストされていません。 他のほとんどのファイルシステム仕様。 ファイルシステムの遅配ます。 diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 215a13a465e..8466c709ad1 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -40,7 +40,7 @@ ClickHouse не работает и не собирается на 32-битны Выполните в терминале: - git clone --recursive git@github.com:ClickHouse/ClickHouse.git + git clone git@github.com:ClickHouse/ClickHouse.git cd ClickHouse Замените первое вхождение слова `ClickHouse` в команде для git на имя вашего аккаунта на GitHub. @@ -82,7 +82,7 @@ ClickHouse не работает и не собирается на 32-битны Работа с сабмодулями git может быть достаточно болезненной. Следующие команды позволят содержать их в порядке: - # ! Каждая команда принимает аргумент --recursive + # ! Каждая команда принимает аргумент # Обновить URLs удалённого репозитория для каждого сабмодуля, используется относительно редко git submodule sync # Добавить новые сабмодули @@ -96,16 +96,16 @@ ClickHouse не работает и не собирается на 32-битны # Synchronizes submodules' remote URL with .gitmodules # Обновить URLs удалённого репозитория для каждого сабмодуля - git submodule sync --recursive + git submodule sync # Обновить существующие модули и добавить отсутствующие - git submodule update --init --recursive + git submodule update --init # Удалить все изменения в сабмодуле относительно HEAD git submodule foreach git reset --hard # Очистить игнорируемые файлы git submodule foreach git clean -xfd # Повторить последние 4 команды для каждого из сабмодулей - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md index 700377ac564..35c100b7837 100644 --- a/docs/ru/engines/table-engines/integrations/mongodb.md +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -15,7 +15,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name name1 [type1], name2 [type2], ... -) ENGINE = MongoDB(host:port, database, collection, user, password); +) ENGINE = MongoDB(host:port, database, collection, user, password [, options]); ``` **Параметры движка** @@ -30,11 +30,13 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `password` — пароль пользователя. +- `options` — MongoDB connection string options (optional parameter). + ## Примеры использования {#usage-example} -Таблица в ClickHouse для чтения данных из колекции MongoDB: +Создание таблицы в ClickHouse для чтения данных из коллекции MongoDB: -``` text +``` sql CREATE TABLE mongo_table ( key UInt64, @@ -42,6 +44,18 @@ CREATE TABLE mongo_table ) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); ``` +Чтение с сервера MongoDB, защищенного SSL: + +``` sql +CREATE TABLE mongo_table_ssl +( + key UInt64, + data String +) ENGINE = MongoDB('mongo2:27017', 'test', 'simple_table', 'testuser', 'clickhouse', 'ssl=true'); +``` + + + Запрос к таблице: ``` sql diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index bef14924d36..07e67ad1b85 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -316,17 +316,26 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### Доступные индексы {#available-types-of-indices} -- `minmax` — Хранит минимум и максимум выражения (если выражение - `tuple`, то для каждого элемента `tuple`), используя их для пропуска блоков аналогично первичному ключу. +- `minmax` — хранит минимум и максимум выражения (если выражение - [Tuple](../../../sql-reference/data-types/tuple.md), то для каждого элемента `Tuple`), используя их для пропуска блоков аналогично первичному ключу. -- `set(max_rows)` — Хранит уникальные значения выражения на блоке в количестве не более `max_rows` (если `max_rows = 0`, то ограничений нет), используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. +- `set(max_rows)` — хранит уникальные значения выражения на блоке в количестве не более `max_rows` (если `max_rows = 0`, то ограничений нет), используя их для пропуска блоков, оценивая выполнимость `WHERE` выражения на хранимых данных. + +- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` — хранит [фильтр Блума](https://en.wikipedia.org/wiki/Bloom_filter), содержащий все N-граммы блока данных. Работает только с данными форматов [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) и [Map](../../../sql-reference/data-types/map.md) с ключами типа `String` или `fixedString`. Может быть использован для оптимизации выражений `EQUALS`, `LIKE` и `IN`. + + - `n` — размер N-граммы, + - `size_of_bloom_filter_in_bytes` — размер в байтах фильтра Блума (можно использовать большие значения, например, 256 или 512, поскольку сжатие компенсирует возможные издержки). + - `number_of_hash_functions` — количество хеш-функций, использующихся в фильтре Блума. + - `random_seed` — состояние генератора случайных чисел для хеш-функций фильтра Блума. + +- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` — то же, что и`ngrambf_v1`, но хранит токены вместо N-грамм. Токены — это последовательности символов, разделенные не буквенно-цифровыми символами. - `bloom_filter([false_positive])` — [фильтр Блума](https://en.wikipedia.org/wiki/Bloom_filter) для указанных стоблцов. Необязательный параметр `false_positive` — это вероятность получения ложноположительного срабатывания. Возможные значения: (0, 1). Значение по умолчанию: 0.025. - Поддержанные типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. + Поддерживаемые типы данных: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. - Фильтром могут пользоваться функции: [equals](../../../engines/table-engines/mergetree-family/mergetree.md), [notEquals](../../../engines/table-engines/mergetree-family/mergetree.md), [in](../../../engines/table-engines/mergetree-family/mergetree.md), [notIn](../../../engines/table-engines/mergetree-family/mergetree.md). + Фильтром могут пользоваться функции: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md#hasarr-elem). **Примеры** diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 5cb75c12155..a12773a75b0 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -29,7 +29,7 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.com/deb/stable/main/. -Если вы хотите использовать наиболее свежую версию, замените `stable` на `testing` (рекомендуется для тестовых окружений). +Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`. Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.com/deb/stable/main/). diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 35062d5cb48..2d4ca1d3dcd 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -611,7 +611,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ## max_concurrent_queries {#max-concurrent-queries} -Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). +Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). !!! info "Примечание" Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. @@ -627,6 +627,21 @@ ClickHouse проверяет условия для `min_part_size` и `min_part 100 ``` +## max_concurrent_queries_for_user {#max-concurrent-queries-for-user} + +Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`, для пользователя. + +Возможные значения: + +- Положительное целое число. +- 0 — выключена. + +**Пример** + +``` xml +5 +``` + ## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users} Если значение этой настройки меньше или равно текущему количеству одновременно обрабатываемых запросов, то будет сгенерировано исключение. diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 6a10e518817..4570ce38bad 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -28,7 +28,7 @@ toc_title: "Настройки пользователей" profile_name default - + default diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e6e697e5eb6..71b6ac513fc 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2684,6 +2684,43 @@ SELECT CAST(toNullable(toInt32(0)) AS Int32) as x, toTypeName(x); Значение по умолчанию: `1`. +## output_format_csv_null_representation {#output_format_csv_null_representation} + +Определяет представление `NULL` для формата выходных данных [CSV](../../interfaces/formats.md#csv). Пользователь может установить в качестве значения любую строку, например, `My NULL`. + +Значение по умолчанию: `\N`. + +**Примеры** + +Запрос: + +```sql +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Результат: + +```text +788 +\N +\N +``` + +Запрос: + +```sql +SET output_format_csv_null_representation = 'My NULL'; +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Результат: + +```text +788 +My NULL +My NULL +``` + ## output_format_tsv_null_representation {#output_format_tsv_null_representation} Определяет представление `NULL` для формата выходных данных [TSV](../../interfaces/formats.md#tabseparated). Пользователь может установить в качестве значения любую строку. diff --git a/docs/ru/operations/system-tables/quotas.md b/docs/ru/operations/system-tables/quotas.md index 3715bc89596..6c8b5a3eebf 100644 --- a/docs/ru/operations/system-tables/quotas.md +++ b/docs/ru/operations/system-tables/quotas.md @@ -11,7 +11,7 @@ - `[]` — Все пользователи используют одну и ту же квоту. - `['user_name']` — Соединения с одинаковым именем пользователя используют одну и ту же квоту. - `['ip_address']` — Соединения с одинаковым IP-адресом используют одну и ту же квоту. - - `['client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Ключ может быть явно задан клиентом. При использовании [clickhouse-client](../../interfaces/cli.md), передайте ключевое значение в параметре `--quota-key`, или используйте параметр `quota_key` файле настроек клиента. В случае использования HTTP интерфейса, используйте заголовок `X-ClickHouse-Quota`. + - `['client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Ключ может быть явно задан клиентом. При использовании [clickhouse-client](../../interfaces/cli.md), передайте ключевое значение в параметре `--quota_key`, или используйте параметр `quota_key` файле настроек клиента. В случае использования HTTP интерфейса, используйте заголовок `X-ClickHouse-Quota`. - `['user_name', 'client_key']` — Соединения с одинаковым ключом используют одну и ту же квоту. Если ключ не предоставлен клиентом, то квота отслеживается для `user_name`. - `['client_key', 'ip_address']` — Соединения с одинаковым ключом используют одну и ту же квоту. Если ключ не предоставлен клиентом, то квота отслеживается для `ip_address`. - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Длины временных интервалов для расчета потребления ресурсов, в секундах. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index 71289423035..bdf7b1d1df6 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -6,20 +6,12 @@ toc_priority: 106 Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений. -Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). - **Синтаксис** ``` sql argMax(arg, val) ``` -или - -``` sql -argMax(tuple(arg, val)) -``` - **Аргументы** - `arg` — аргумент. @@ -31,12 +23,6 @@ argMax(tuple(arg, val)) Тип: соответствует типу `arg`. -Если передан кортеж: - -- кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`. - -Тип: [Tuple](../../../sql-reference/data-types/tuple.md). - **Пример** Исходная таблица: @@ -52,14 +38,14 @@ argMax(tuple(arg, val)) Запрос: ``` sql -SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary; +SELECT argMax(user, salary), argMax(tuple(user, salary), salary) FROM salary; ``` Результат: ``` text -┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐ -│ director │ ('director',5000) │ ('director',5000) │ -└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘ +┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 4ee78a73a84..dd923061943 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -6,20 +6,12 @@ toc_priority: 105 Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений. -Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). - **Синтаксис** ``` sql argMin(arg, val) ``` -или - -``` sql -argMin(tuple(arg, val)) -``` - **Аргументы** - `arg` — аргумент. @@ -31,12 +23,6 @@ argMin(tuple(arg, val)) Тип: соответствует типу `arg`. -Если передан кортеж: - -- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`. - -Тип: [Tuple](../../../sql-reference/data-types/tuple.md). - **Пример** Исходная таблица: @@ -52,14 +38,14 @@ argMin(tuple(arg, val)) Запрос: ``` sql -SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; +SELECT argMin(user, salary) FROM salary; ``` Результат: ``` text -┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ -│ worker │ ('worker',1000) │ -└──────────────────────┴─────────────────────────────┘ +┌─argMin(user, salary)─┐ +│ worker │ +└──────────────────────┘ ``` diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 7b81c577762..8f47bd0902b 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -21,8 +21,6 @@ - [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) -- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md) -- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) !!! note "Примечание" Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 06fe4ae327a..647e2c5f5a7 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -53,14 +53,17 @@ LAYOUT(LAYOUT_TYPE(param value)) -- layout settings - [flat](#flat) - [hashed](#dicts-external_dicts_dict_layout-hashed) - [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed) -- [cache](#cache) -- [ssd_cache](#ssd-cache) -- [ssd_complex_key_cache](#complex-key-ssd-cache) -- [direct](#direct) -- [range_hashed](#range-hashed) - [complex_key_hashed](#complex-key-hashed) +- [complex_key_sparse_hashed](#complex-key-sparse-hashed) +- [hashed_array](#dicts-external_dicts_dict_layout-hashed-array) +- [complex_key_hashed_array](#complex-key-hashed-array) +- [range_hashed](#range-hashed) - [complex_key_range_hashed](#complex-key-range-hashed) +- [cache](#cache) - [complex_key_cache](#complex-key-cache) +- [ssd_cache](#ssd-cache) +- [complex_key_ssd_cache](#complex-key-ssd-cache) +- [direct](#direct) - [complex_key_direct](#complex-key-direct) - [ip_trie](#ip-trie) @@ -140,7 +143,7 @@ LAYOUT(SPARSE_HASHED([PREALLOCATE 0])) ### complex_key_hashed {#complex-key-hashed} -Тип размещения предназначен для использования с составными [ключами](external-dicts-dict-structure.md). Аналогичен `hashed`. +Тип размещения предназначен для использования с составными [ключами](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Аналогичен `hashed`. Пример конфигурации: @@ -156,6 +159,63 @@ LAYOUT(SPARSE_HASHED([PREALLOCATE 0])) LAYOUT(COMPLEX_KEY_HASHED()) ``` +### complex_key_sparse_hashed {#complex-key-sparse-hashed} + +Тип размещения предназначен для использования с составными [ключами](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Аналогичен [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed). + +Пример конфигурации: + +``` xml + + + +``` + +или + +``` sql +LAYOUT(COMPLEX_KEY_SPARSE_HASHED()) +``` + +### hashed_array {#dicts-external_dicts_dict_layout-hashed-array} + +Словарь полностью хранится в оперативной памяти. Каждый атрибут хранится в массиве. Ключевой атрибут хранится в виде хеш-таблицы, где его значение является индексом в массиве атрибутов. Словарь может содержать произвольное количество элементов с произвольными идентификаторами. На практике количество ключей может достигать десятков миллионов элементов. + +Поддерживаются все виды источников. При обновлении данные (из файла, из таблицы) считываются целиком. + +Пример конфигурации: + +``` xml + + + + +``` + +или + +``` sql +LAYOUT(HASHED_ARRAY()) +``` + +### complex_key_hashed_array {#complex-key-hashed-array} + +Тип размещения предназначен для использования с составными [ключами](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). Аналогичен [hashed_array](#dicts-external_dicts_dict_layout-hashed-array). + +Пример конфигурации: + +``` xml + + + +``` + +или + +``` sql +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) +``` + ### range_hashed {#range-hashed} Словарь хранится в оперативной памяти в виде хэш-таблицы с упорядоченным массивом диапазонов и соответствующих им значений. @@ -297,9 +357,13 @@ RANGE(MIN StartDate MAX EndDate); При поиске в словаре сначала просматривается кэш. На каждый блок данных, все не найденные в кэше или устаревшие ключи запрашиваются у источника с помощью `SELECT attrs... FROM db.table WHERE id IN (k1, k2, ...)`. Затем, полученные данные записываются в кэш. -Для cache-словарей может быть задано время устаревания [lifetime](external-dicts-dict-lifetime.md) данных в кэше. Если от загрузки данных в ячейке прошло больше времени, чем `lifetime`, то значение не используется, и будет запрошено заново при следующей необходимости его использовать. +Если ключи не были найдены в словаре, то для обновления кэша создается задание и добавляется в очередь обновлений. Параметры очереди обновлений можно устанавливать настройками `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates` -Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа cache показывает высокую производительность лишь при достаточно больших hit rate-ах (рекомендуется 99% и выше). Посмотреть средний hit rate можно в таблице `system.dictionaries`. +Для cache-словарей при помощи настройки `allow_read_expired_keys` может быть задано время устаревания [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) данных в кэше. Если с момента загрузки данных в ячейку прошло больше времени, чем `lifetime`, то значение не используется, а ключ устаревает. Ключ будет запрошен заново при следующей необходимости его использовать. + +Это наименее эффективный из всех способов размещения словарей. Скорость работы кэша очень сильно зависит от правильности настройки и сценария использования. Словарь типа `cache` показывает высокую производительность лишь при достаточно большой частоте успешных обращений (рекомендуется 99% и выше). Посмотреть среднюю частоту успешных обращений (`hit rate`) можно в таблице [system.dictionaries](../../../operations/system-tables/dictionaries.md). + +Если параметр `allow_read_expired_keys` выставлен в 1 (0 по умолчанию), то словарь поддерживает асинхронные обновления. Если клиент запрашивает ключи, которые находятся в кэше, но при этом некоторые из них устарели, то словарь вернет устаревшие ключи клиенту и запросит их асинхронно у источника. Чтобы увеличить производительность кэша, используйте подзапрос с `LIMIT`, а снаружи вызывайте функцию со словарём. @@ -312,6 +376,16 @@ RANGE(MIN StartDate MAX EndDate); 1000000000 + + 0 + + 100000 + + 10 + + 60000 + + 4 ``` @@ -338,7 +412,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ### ssd_cache {#ssd-cache} -Похож на `cache`, но хранит данные на SSD и индекс в оперативной памяти. +Похож на `cache`, но хранит данные на SSD, а индекс в оперативной памяти. Все параметры, относящиеся к очереди обновлений, могут также быть применены к SSD-кэш словарям. ``` xml @@ -352,7 +426,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) 1048576 - /var/lib/clickhouse/clickhouse_dictionaries/test_dict + /var/lib/clickhouse/user_files/test_dict ``` @@ -361,7 +435,7 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) ``` sql LAYOUT(SSD_CACHE(BLOCK_SIZE 4096 FILE_SIZE 16777216 READ_BUFFER_SIZE 1048576 - PATH ./user_files/test_dict)) + PATH '/var/lib/clickhouse/user_files/test_dict')) ``` ### complex_key_ssd_cache {#complex-key-ssd-cache} diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 925aac56968..7d2cea0af4e 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -2375,3 +2375,39 @@ shardCount() **См. также** - Пример использования функции [shardNum()](#shard-num) также содержит вызов `shardCount()`. + +## getOSKernelVersion {#getoskernelversion} + +Возвращает строку с текущей версией ядра ОС. + +**Синтаксис** + +``` sql +getOSKernelVersion() +``` + +**Аргументы** + +- Нет. + +**Возвращаемое значение** + +- Текущая версия ядра ОС. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT getOSKernelVersion(); +``` + +Результат: + +``` text +┌─getOSKernelVersion()────┐ +│ Linux 4.15.0-55-generic │ +└─────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/splitting-merging-functions.md b/docs/ru/sql-reference/functions/splitting-merging-functions.md index efe74dba043..99eda621e72 100644 --- a/docs/ru/sql-reference/functions/splitting-merging-functions.md +++ b/docs/ru/sql-reference/functions/splitting-merging-functions.md @@ -232,3 +232,41 @@ SELECT alphaTokens('abca1abc'); │ ['abca','abc'] │ └─────────────────────────┘ ``` + +## ngrams {#ngrams} + +Выделяет из UTF-8 строки отрезки (n-граммы) размером `ngramsize` символов. + +**Синтаксис** + +``` sql +ngrams(string, ngramsize) +``` + +**Аргументы** + +- `string` — строка. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `ngramsize` — размер n-грамм. [UInt](../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +- Массив с n-граммами. + +Тип: [Array](../../sql-reference/data-types/array.md)([FixedString](../../sql-reference/data-types/fixedstring.md)). + +**Пример** + +Запрос: + +``` sql +SELECT ngrams('ClickHouse', 3); +``` + +Результат: + +``` text +┌─ngrams('ClickHouse', 3)───────────────────────────┐ +│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │ +└───────────────────────────────────────────────────┘ +``` + diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index 22efaa71bfc..f6248d97ba9 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -15,6 +15,7 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] + [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...] ``` diff --git a/docs/ru/sql-reference/statements/select/from.md b/docs/ru/sql-reference/statements/select/from.md index b62b2fd51d4..0711d602cd1 100644 --- a/docs/ru/sql-reference/statements/select/from.md +++ b/docs/ru/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ toc_title: FROM Если в запросе используется модификатор `FINAL`, то ClickHouse полностью мёржит данные перед выдачей результата, таким образом выполняя все преобразования данных, которые производятся движком таблиц при мёржах. -Он применим при выборе данных из таблиц, использующих [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)- семейство движков (кроме `GraphiteMergeTree`). Также поддерживается для: +Он применим при выборе данных из таблиц, использующих [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)- семейство движков. Также поддерживается для: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты исполнения `MergeTree` движков. - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md), и [MaterializedView](../../../engines/table-engines/special/materializedview.md), которые работают поверх других движков, если они созданы для таблиц с движками семейства `MergeTree`. diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md index e37533fb36b..bd7a197f926 100644 --- a/docs/zh/development/developer-instruction.md +++ b/docs/zh/development/developer-instruction.md @@ -29,7 +29,7 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 在终端命令行输入下列指令: - git clone --recursive git@guthub.com:your_github_username/ClickHouse.git + git clone git@guthub.com:your_github_username/ClickHouse.git cd ClickHouse 请注意,您需要将*your_github_username* 替换成实际使用的账户名! @@ -71,7 +71,7 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 在git中使用子模块可能会很痛苦。 接下来的命令将有助于管理它: - # ! each command accepts --recursive + # ! each command accepts # Update remote URLs for submodules. Barely rare case git submodule sync # Add new submodules @@ -84,16 +84,16 @@ ClickHose支持Linux,FreeBSD 及 Mac OS X 系统。 接下来的命令将帮助您将所有子模块重置为初始状态(!华林! -里面的任何chenges将被删除): # Synchronizes submodules' remote URL with .gitmodules - git submodule sync --recursive + git submodule sync # Update the registered submodules with initialize not yet initialized - git submodule update --init --recursive + git submodule update --init # Reset all changes done after HEAD git submodule foreach git reset --hard # Clean files from .gitignore git submodule foreach git clean -xfd # Repeat last 4 commands for all submodule - git submodule foreach git submodule sync --recursive - git submodule foreach git submodule update --init --recursive + git submodule foreach git submodule sync + git submodule foreach git submodule update --init git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd diff --git a/docs/zh/operations/tips.md b/docs/zh/operations/tips.md index a4755086941..63a74abd7d8 100644 --- a/docs/zh/operations/tips.md +++ b/docs/zh/operations/tips.md @@ -71,7 +71,7 @@ echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size ## 文件系统 {#file-system} -Ext4是最可靠的选择。 设置挂载选项 `noatime, nobarrier`. +Ext4是最可靠的选择。 设置挂载选项 `noatime`. XFS也是合适的,但它还没有经过ClickHouse的全面测试。 大多数其他文件系统也应该可以正常工作。 具有延迟分配的文件系统工作得更好。 diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md index 9d90590b2f1..ed3d2d07a58 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmax.md @@ -6,20 +6,12 @@ toc_priority: 106 计算 `val` 最大值对应的 `arg` 值。 如果 `val` 最大值存在几个不同的 `arg` 值,输出遇到的第一个值。 -这个函数的Tuple版本将返回 `val` 最大值对应的元组。本函数适合和 `SimpleAggregateFunction` 搭配使用。 - **语法** ``` sql argMax(arg, val) ``` -或 - -``` sql -argMax(tuple(arg, val)) -``` - **参数** - `arg` — Argument. @@ -31,12 +23,6 @@ argMax(tuple(arg, val)) 类型: 匹配 `arg` 类型。 -对于输入中的元组: - -- 元组 `(arg, val)`, 其中 `val` 最大值,`arg` 是对应的值。 - -类型: [元组](../../../sql-reference/data-types/tuple.md)。 - **示例** 输入表: @@ -52,13 +38,13 @@ argMax(tuple(arg, val)) 查询: ``` sql -SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary; +SELECT argMax(user, salary), argMax(tuple(user, salary), salary) FROM salary; ``` 结果: ``` text -┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐ -│ director │ ('director',5000) │ ('director',5000) │ -└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘ +┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md index 0dd4625ac0d..8d987300ad4 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/argmin.md @@ -8,8 +8,6 @@ toc_priority: 105 计算 `val` 最小值对应的 `arg` 值。 如果 `val` 最小值存在几个不同的 `arg` 值,输出遇到的第一个(`arg`)值。 -这个函数的Tuple版本将返回 `val` 最小值对应的tuple。本函数适合和`SimpleAggregateFunction`搭配使用。 - **示例:** 输入表: @@ -25,13 +23,13 @@ toc_priority: 105 查询: ``` sql -SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; +SELECT argMin(user, salary), argMin(tuple(user, salary), salary) FROM salary; ``` 结果: ``` text -┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ -│ worker │ ('worker',1000) │ -└──────────────────────┴─────────────────────────────┘ +┌─argMin(user, salary)─┬─argMin(tuple(user, salary), salary)─┐ +│ worker │ ('worker',1000) │ +└──────────────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md index fae25c0c3c1..c47e74e5e1f 100644 --- a/docs/zh/sql-reference/statements/select/from.md +++ b/docs/zh/sql-reference/statements/select/from.md @@ -20,7 +20,7 @@ toc_title: FROM 当 `FINAL` 被指定,ClickHouse会在返回结果之前完全合并数据,从而执行给定表引擎合并期间发生的所有数据转换。 -它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族(除了 `GraphiteMergeTree`). 还支持: +它适用于从使用 [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)-引擎族. 还支持: - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎 - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可。 diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index caa0a87bde2..1c276a83768 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -432,7 +432,7 @@ private: Progress progress; executor.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); - BlockStreamProfileInfo info; + ProfileInfo info; while (Block block = executor.read()) info.update(block); diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 11459ff4a4a..98d3044f007 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -25,9 +25,6 @@ #endif #include #include -#include -#include -#include #include #include #include "Common/MemoryTracker.h" @@ -35,13 +32,11 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -51,9 +46,6 @@ #include #include #include -#include -#include -#include #include @@ -86,7 +78,6 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int TOO_DEEP_RECURSION; extern const int NETWORK_ERROR; - extern const int UNRECOGNIZED_ARGUMENTS; extern const int AUTHENTICATION_FAILED; } @@ -993,7 +984,7 @@ void Client::printHelpMessage(const OptionsDescription & options_description) } -void Client::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void Client::addOptions(OptionsDescription & options_description) { /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() @@ -1050,14 +1041,6 @@ void Client::addAndCheckOptions(OptionsDescription & options_description, po::va ( "types", po::value(), "types" ); - - cmd_settings.addProgramOptions(options_description.main_description.value()); - /// Parse main commandline options. - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[1]); - po::store(parsed, options); } @@ -1235,16 +1218,16 @@ int mainEntryClickHouseClient(int argc, char ** argv) client.init(argc, argv); return client.run(); } - catch (const boost::program_options::error & e) - { - std::cerr << "Bad arguments: " << e.what() << std::endl; - return 1; - } catch (const DB::Exception & e) { std::cerr << DB::getExceptionMessage(e, false) << std::endl; return 1; } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; diff --git a/programs/client/Client.h b/programs/client/Client.h index 43f6deae0b5..2def74ef3fc 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -24,7 +24,7 @@ protected: String getName() const override { return "client"; } void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) override; void processConfig() override; diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index e7aeea8cbad..5b21a7c2aef 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index c5e702cd1dc..6fc69361c90 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -57,14 +57,6 @@ std::shared_ptr createASTStorageDistributed( } -BlockInputStreamPtr squashStreamIntoOneBlock(const BlockInputStreamPtr & stream) -{ - return std::make_shared( - stream, - std::numeric_limits::max(), - std::numeric_limits::max()); -} - Block getBlockWithAllStreamData(QueryPipeline pipeline) { QueryPipelineBuilder builder; @@ -82,7 +74,6 @@ Block getBlockWithAllStreamData(QueryPipeline pipeline) return block; } - bool isExtendedDefinitionStorage(const ASTPtr & storage_ast) { const auto & storage = storage_ast->as(); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 9e648060592..eb2622c6b26 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -49,9 +49,7 @@ #include #include #include -#include -#include -#include +#include #include #include #include diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 984543a6c6b..4b0e8ad1ca1 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -25,6 +25,8 @@ #include #include #include +#include +#include #pragma GCC diagnostic ignored "-Wunused-function" @@ -114,6 +116,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) registerAggregateFunctions(); registerTableFunctions(); registerStorages(); + registerFormats(); std::unordered_set additional_names; @@ -130,6 +133,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) return FunctionFactory::instance().has(what) || AggregateFunctionFactory::instance().isAggregateFunctionName(what) || TableFunctionFactory::instance().isTableFunctionName(what) + || FormatFactory::instance().isOutputFormat(what) + || FormatFactory::instance().isInputFormat(what) || additional_names.count(what); }; diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index e001d0ceb53..606af7ecd0d 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -66,6 +66,7 @@ namespace ErrorCodes extern const int CANNOT_OPEN_FILE; extern const int SYSTEM_ERROR; extern const int NOT_ENOUGH_SPACE; + extern const int NOT_IMPLEMENTED; extern const int CANNOT_KILL; } @@ -75,8 +76,18 @@ namespace ErrorCodes #define HILITE "\033[1m" #define END_HILITE "\033[0m" -static constexpr auto CLICKHOUSE_BRIDGE_USER = "clickhouse-bridge"; -static constexpr auto CLICKHOUSE_BRIDGE_GROUP = "clickhouse-bridge"; +#if defined(OS_DARWIN) +/// Until createUser() and createGroup() are implemented, only sudo-less installations are supported/default for macOS. +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_USER = ""; +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_GROUP = ""; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_USER = ""; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_GROUP = ""; +#else +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_USER = "clickhouse"; +static constexpr auto DEFAULT_CLICKHOUSE_SERVER_GROUP = "clickhouse"; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_USER = "clickhouse-bridge"; +static constexpr auto DEFAULT_CLICKHOUSE_BRIDGE_GROUP = "clickhouse-bridge"; +#endif using namespace DB; namespace po = boost::program_options; @@ -127,36 +138,83 @@ static bool filesEqual(std::string path1, std::string path2) && 0 == memcmp(in1.buffer().begin(), in2.buffer().begin(), in1.buffer().size()); } +static void changeOwnership(const String & file_name, const String & user_name, const String & group_name = {}, bool recursive = true) +{ + if (!user_name.empty() || !group_name.empty()) + { + std::string command = fmt::format("chown {} {}:{} '{}'", (recursive ? "-R" : ""), user_name, group_name, file_name); + fmt::print(" {}\n", command); + executeScript(command); + } +} + +static void createGroup(const String & group_name) +{ + if (!group_name.empty()) + { +#if defined(OS_DARWIN) + + // TODO: implement. + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a group in macOS"); +#else + std::string command = fmt::format("groupadd -r {}", group_name); + fmt::print(" {}\n", command); + executeScript(command); +#endif + } +} + +static void createUser(const String & user_name, [[maybe_unused]] const String & group_name) +{ + if (!user_name.empty()) + { +#if defined(OS_DARWIN) + + // TODO: implement. + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unable to create a user in macOS"); +#else + std::string command = group_name.empty() + ? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name) + : fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group_name, user_name); + fmt::print(" {}\n", command); + executeScript(command); +#endif + } +} + int mainEntryClickHouseInstall(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("prefix", po::value()->default_value(""), "prefix for all paths") - ("binary-path", po::value()->default_value("/usr/bin"), "where to install binaries") - ("config-path", po::value()->default_value("/etc/clickhouse-server"), "where to install configs") - ("log-path", po::value()->default_value("/var/log/clickhouse-server"), "where to create log directory") - ("data-path", po::value()->default_value("/var/lib/clickhouse"), "directory for data") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("user", po::value()->default_value("clickhouse"), "clickhouse user to create") - ("group", po::value()->default_value("clickhouse"), "clickhouse group to create") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " install [options]\n"; - std::cout << desc << '\n'; - } - try { + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("binary-path", po::value()->default_value("usr/bin"), "where to install binaries") + ("config-path", po::value()->default_value("etc/clickhouse-server"), "where to install configs") + ("log-path", po::value()->default_value("var/log/clickhouse-server"), "where to create log directory") + ("data-path", po::value()->default_value("var/lib/clickhouse"), "directory for data") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user to create") + ("group", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_GROUP), "clickhouse group to create") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " install [options]\n"; + std::cout << desc << '\n'; + return 1; + } + /// We need to copy binary to the binary directory. /// The binary is currently run. We need to obtain its path from procfs (on Linux). @@ -171,6 +229,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (res != 0) Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary"); + if (path.back() == '\0') + path.pop_back(); + fs::path binary_self_path(path); #else fs::path binary_self_path = "/proc/self/exe"; @@ -186,8 +247,8 @@ int mainEntryClickHouseInstall(int argc, char ** argv) /// TODO An option to link instead of copy - useful for developers. - fs::path prefix = fs::path(options["prefix"].as()); - fs::path bin_dir = prefix / fs::path(options["binary-path"].as()); + fs::path prefix = options["prefix"].as(); + fs::path bin_dir = prefix / options["binary-path"].as(); fs::path main_bin_path = bin_dir / "clickhouse"; fs::path main_bin_tmp_path = bin_dir / "clickhouse.new"; @@ -225,6 +286,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } else { + if (!fs::exists(bin_dir)) + { + fmt::print("Creating binary directory {}.\n", bin_dir.string()); + fs::create_directories(bin_dir); + } + size_t available_space = fs::space(bin_dir).available; if (available_space < binary_size) throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for clickhouse binary in {}, required {}, available {}.", @@ -326,34 +393,18 @@ int mainEntryClickHouseInstall(int argc, char ** argv) std::string user = options["user"].as(); std::string group = options["group"].as(); - auto create_group = [](const String & group_name) - { - std::string command = fmt::format("groupadd -r {}", group_name); - fmt::print(" {}\n", command); - executeScript(command); - }; - if (!group.empty()) { fmt::print("Creating clickhouse group if it does not exist.\n"); - create_group(group); + createGroup(group); } else - fmt::print("Will not create clickhouse group"); - - auto create_user = [](const String & user_name, const String & group_name) - { - std::string command = group_name.empty() - ? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name) - : fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group_name, user_name); - fmt::print(" {}\n", command); - executeScript(command); - }; + fmt::print("Will not create a dedicated clickhouse group.\n"); if (!user.empty()) { fmt::print("Creating clickhouse user if it does not exist.\n"); - create_user(user, group); + createUser(user, group); if (group.empty()) group = user; @@ -361,6 +412,11 @@ int mainEntryClickHouseInstall(int argc, char ** argv) /// Setting ulimits. try { +#if defined(OS_DARWIN) + + /// TODO Set ulimits on macOS. + +#else fs::path ulimits_dir = "/etc/security/limits.d"; fs::path ulimits_file = ulimits_dir / fmt::format("{}.conf", user); fmt::print("Will set ulimits for {} user in {}.\n", user, ulimits_file.string()); @@ -374,16 +430,15 @@ int mainEntryClickHouseInstall(int argc, char ** argv) out.write(ulimits_content.data(), ulimits_content.size()); out.sync(); out.finalize(); +#endif } catch (...) { std::cerr << "Cannot set ulimits: " << getCurrentExceptionMessage(false) << "\n"; } - - /// TODO Set ulimits on Mac OS X } else - fmt::print("Will not create clickhouse user.\n"); + fmt::print("Will not create a dedicated clickhouse user.\n"); /// Creating configuration files and directories. @@ -400,9 +455,9 @@ int mainEntryClickHouseInstall(int argc, char ** argv) fs::path config_d = config_dir / "config.d"; fs::path users_d = config_dir / "users.d"; - std::string log_path = prefix / options["log-path"].as(); - std::string data_path = prefix / options["data-path"].as(); - std::string pid_path = prefix / options["pid-path"].as(); + fs::path log_path = prefix / options["log-path"].as(); + fs::path data_path = prefix / options["data-path"].as(); + fs::path pid_path = prefix / options["pid-path"].as(); bool has_password_for_default_user = false; @@ -427,10 +482,78 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } else { - WriteBufferFromFile out(main_config_file.string()); - out.write(main_config_content.data(), main_config_content.size()); - out.sync(); - out.finalize(); + { + WriteBufferFromFile out(main_config_file.string()); + out.write(main_config_content.data(), main_config_content.size()); + out.sync(); + out.finalize(); + } + + /// Override the default paths. + + /// Data paths. + { + std::string data_file = config_d / "data-paths.xml"; + WriteBufferFromFile out(data_file); + out << "\n" + " " << data_path.string() << "\n" + " " << (data_path / "tmp").string() << "\n" + " " << (data_path / "user_files").string() << "\n" + " " << (data_path / "format_schemas").string() << "\n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("Data path configuration override is saved to file {}.\n", data_file); + } + + /// Logger. + { + std::string logger_file = config_d / "logger.xml"; + WriteBufferFromFile out(logger_file); + out << "\n" + " \n" + " " << (log_path / "clickhouse-server.log").string() << "\n" + " " << (log_path / "clickhouse-server.err.log").string() << "\n" + " \n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("Log path configuration override is saved to file {}.\n", logger_file); + } + + /// User directories. + { + std::string user_directories_file = config_d / "user-directories.xml"; + WriteBufferFromFile out(user_directories_file); + out << "\n" + " \n" + " \n" + " " << (data_path / "access").string() << "\n" + " \n" + " \n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("User directory path configuration override is saved to file {}.\n", user_directories_file); + } + + /// OpenSSL. + { + std::string openssl_file = config_d / "openssl.xml"; + WriteBufferFromFile out(openssl_file); + out << "\n" + " \n" + " \n" + " " << (config_dir / "server.crt").string() << "\n" + " " << (config_dir / "server.key").string() << "\n" + " " << (config_dir / "dhparam.pem").string() << "\n" + " \n" + " \n" + "\n"; + out.sync(); + out.finalize(); + fmt::print("OpenSSL path configuration override is saved to file {}.\n", openssl_file); + } } } else @@ -443,13 +566,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (configuration->has("path")) { data_path = configuration->getString("path"); - fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path); + fmt::print("{} has {} as data path.\n", main_config_file.string(), data_path.string()); } if (configuration->has("logger.log")) { log_path = fs::path(configuration->getString("logger.log")).remove_filename(); - fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path); + fmt::print("{} has {} as log path.\n", main_config_file.string(), log_path.string()); } } @@ -485,82 +608,44 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } } - auto change_ownership = [](const String & file_name, const String & user_name, const String & group_name) - { - std::string command = fmt::format("chown --recursive {}:{} '{}'", user_name, group_name, file_name); - fmt::print(" {}\n", command); - executeScript(command); - }; - - /// Chmod and chown configs - change_ownership(config_dir.string(), user, group); - - /// Symlink "preprocessed_configs" is created by the server, so "write" is needed. - fs::permissions(config_dir, fs::perms::owner_all, fs::perm_options::replace); - - /// Subdirectories, so "execute" is needed. - if (fs::exists(config_d)) - fs::permissions(config_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); - if (fs::exists(users_d)) - fs::permissions(users_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); - - /// Readonly. - if (fs::exists(main_config_file)) - fs::permissions(main_config_file, fs::perms::owner_read, fs::perm_options::replace); - if (fs::exists(users_config_file)) - fs::permissions(users_config_file, fs::perms::owner_read, fs::perm_options::replace); - /// Create directories for data and log. if (fs::exists(log_path)) { - fmt::print("Log directory {} already exists.\n", log_path); + fmt::print("Log directory {} already exists.\n", log_path.string()); } else { - fmt::print("Creating log directory {}.\n", log_path); + fmt::print("Creating log directory {}.\n", log_path.string()); fs::create_directories(log_path); } if (fs::exists(data_path)) { - fmt::print("Data directory {} already exists.\n", data_path); + fmt::print("Data directory {} already exists.\n", data_path.string()); } else { - fmt::print("Creating data directory {}.\n", data_path); + fmt::print("Creating data directory {}.\n", data_path.string()); fs::create_directories(data_path); } if (fs::exists(pid_path)) { - fmt::print("Pid directory {} already exists.\n", pid_path); + fmt::print("Pid directory {} already exists.\n", pid_path.string()); } else { - fmt::print("Creating pid directory {}.\n", pid_path); + fmt::print("Creating pid directory {}.\n", pid_path.string()); fs::create_directories(pid_path); } /// Chmod and chown data and log directories - { - std::string command = fmt::format("chown --recursive {}:{} '{}'", user, group, log_path); - fmt::print(" {}\n", command); - executeScript(command); - } + changeOwnership(log_path, user, group); + changeOwnership(pid_path, user, group); - { - std::string command = fmt::format("chown --recursive {}:{} '{}'", user, group, pid_path); - fmt::print(" {}\n", command); - executeScript(command); - } - - { - /// Not recursive, because there can be a huge number of files and it will be slow. - std::string command = fmt::format("chown {}:{} '{}'", user, group, data_path); - fmt::print(" {}\n", command); - executeScript(command); - } + /// Not recursive, because there can be a huge number of files and it will be slow. + changeOwnership(data_path, user, group, /* recursive= */ false); /// All users are allowed to read pid file (for clickhouse status command). fs::permissions(pid_path, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, fs::perm_options::replace); @@ -576,13 +661,13 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (fs::exists(odbc_bridge_path) || fs::exists(library_bridge_path)) { - create_group(CLICKHOUSE_BRIDGE_GROUP); - create_user(CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP); + createGroup(DEFAULT_CLICKHOUSE_BRIDGE_GROUP); + createUser(DEFAULT_CLICKHOUSE_BRIDGE_USER, DEFAULT_CLICKHOUSE_BRIDGE_GROUP); if (fs::exists(odbc_bridge_path)) - change_ownership(odbc_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP); + changeOwnership(odbc_bridge_path, DEFAULT_CLICKHOUSE_BRIDGE_USER, DEFAULT_CLICKHOUSE_BRIDGE_GROUP); if (fs::exists(library_bridge_path)) - change_ownership(library_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP); + changeOwnership(library_bridge_path, DEFAULT_CLICKHOUSE_BRIDGE_USER, DEFAULT_CLICKHOUSE_BRIDGE_GROUP); } bool stdin_is_a_tty = isatty(STDIN_FILENO); @@ -701,6 +786,25 @@ int mainEntryClickHouseInstall(int argc, char ** argv) } } + /// Chmod and chown configs + changeOwnership(config_dir, user, group); + + /// Symlink "preprocessed_configs" is created by the server, so "write" is needed. + fs::permissions(config_dir, fs::perms::owner_all, fs::perm_options::replace); + + /// Subdirectories, so "execute" is needed. + if (fs::exists(config_d)) + fs::permissions(config_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); + if (fs::exists(users_d)) + fs::permissions(users_d, fs::perms::owner_read | fs::perms::owner_exec, fs::perm_options::replace); + + /// Readonly. + if (fs::exists(main_config_file)) + fs::permissions(main_config_file, fs::perms::owner_read, fs::perm_options::replace); + if (fs::exists(users_config_file)) + fs::permissions(users_config_file, fs::perms::owner_read, fs::perm_options::replace); + + std::string maybe_password; if (has_password_for_default_user) maybe_password = " --password"; @@ -766,11 +870,7 @@ namespace /// All users are allowed to read pid file (for clickhouse status command). fs::permissions(pid_path, fs::perms::owner_all | fs::perms::group_read | fs::perms::others_read, fs::perm_options::replace); - { - std::string command = fmt::format("chown --recursive {} '{}'", user, pid_path.string()); - fmt::print(" {}\n", command); - executeScript(command); - } + changeOwnership(pid_path, user); } std::string command = fmt::format("{} --config-file {} --pid-file {} --daemon", @@ -974,34 +1074,36 @@ namespace int mainEntryClickHouseStart(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("binary-path", po::value()->default_value("/usr/bin"), "directory with binary") - ("config-path", po::value()->default_value("/etc/clickhouse-server"), "directory with configs") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("user", po::value()->default_value("clickhouse"), "clickhouse user") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " start\n"; - return 1; - } - try { + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") + ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " start\n"; + return 1; + } + std::string user = options["user"].as(); - fs::path executable = fs::path(options["binary-path"].as()) / "clickhouse-server"; - fs::path config = fs::path(options["config-path"].as()) / "config.xml"; - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + fs::path prefix = options["prefix"].as(); + fs::path executable = prefix / options["binary-path"].as() / "clickhouse-server"; + fs::path config = prefix / options["config-path"].as() / "config.xml"; + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; return start(user, executable, config, pid_file); } @@ -1015,28 +1117,30 @@ int mainEntryClickHouseStart(int argc, char ** argv) int mainEntryClickHouseStop(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("force", po::bool_switch(), "Stop with KILL signal instead of TERM") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " stop\n"; - return 1; - } - try { - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("force", po::bool_switch(), "Stop with KILL signal instead of TERM") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " stop\n"; + return 1; + } + + fs::path prefix = options["prefix"].as(); + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; return stop(pid_file, options["force"].as()); } @@ -1050,72 +1154,79 @@ int mainEntryClickHouseStop(int argc, char ** argv) int mainEntryClickHouseStatus(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " status\n"; - return 1; - } - try { - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " status\n"; + return 1; + } + + fs::path prefix = options["prefix"].as(); + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; + isRunning(pid_file); - return 0; } catch (...) { std::cerr << getCurrentExceptionMessage(false) << '\n'; return getCurrentExceptionCode(); } + + return 0; } int mainEntryClickHouseRestart(int argc, char ** argv) { - po::options_description desc; - desc.add_options() - ("help,h", "produce help message") - ("binary-path", po::value()->default_value("/usr/bin"), "directory with binary") - ("config-path", po::value()->default_value("/etc/clickhouse-server"), "directory with configs") - ("pid-path", po::value()->default_value("/var/run/clickhouse-server"), "directory for pid file") - ("user", po::value()->default_value("clickhouse"), "clickhouse user") - ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") - ; - - po::variables_map options; - po::store(po::parse_command_line(argc, argv, desc), options); - - if (options.count("help")) - { - std::cout << "Usage: " - << (getuid() == 0 ? "" : "sudo ") - << argv[0] - << " restart\n"; - return 1; - } - try { + po::options_description desc; + desc.add_options() + ("help,h", "produce help message") + ("prefix", po::value()->default_value("/"), "prefix for all paths") + ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") + ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") + ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") + ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") + ("force", po::value()->default_value(false), "Stop with KILL signal instead of TERM") + ; + + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); + + if (options.count("help")) + { + std::cout << "Usage: " + << (getuid() == 0 ? "" : "sudo ") + << argv[0] + << " restart\n"; + return 1; + } + std::string user = options["user"].as(); - fs::path executable = fs::path(options["binary-path"].as()) / "clickhouse-server"; - fs::path config = fs::path(options["config-path"].as()) / "config.xml"; - fs::path pid_file = fs::path(options["pid-path"].as()) / "clickhouse-server.pid"; + fs::path prefix = options["prefix"].as(); + fs::path executable = prefix / options["binary-path"].as() / "clickhouse-server"; + fs::path config = prefix / options["config-path"].as() / "config.xml"; + fs::path pid_file = prefix / options["pid-path"].as() / "clickhouse-server.pid"; if (int res = stop(pid_file, options["force"].as())) return res; + return start(user, executable, config, pid_file); } catch (...) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 28bbb95e01d..da3c42a3213 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -358,8 +358,8 @@ int Keeper::main(const std::vector & /*args*/) auto servers = std::make_shared>(); - /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperDispatcher(); + /// Initialize keeper RAFT. Do nothing if no keeper_server in config. + global_context->initializeKeeperDispatcher(/* start_async = */false); for (const auto & listen_host : listen_hosts) { /// TCP Keeper diff --git a/programs/library-bridge/Handlers.cpp b/programs/library-bridge/Handlers.cpp index 2f6dca5ee65..bf9ace679ba 100644 --- a/programs/library-bridge/Handlers.cpp +++ b/programs/library-bridge/Handlers.cpp @@ -1,7 +1,6 @@ #include "Handlers.h" #include "SharedLibraryHandlerFactory.h" -#include #include #include #include @@ -10,11 +9,13 @@ #include #include #include -#include #include -#include +#include +#include #include +#include #include +#include #include #include @@ -189,8 +190,10 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe ReadBufferFromString read_block_buf(params.get("null_values")); auto format = getContext()->getInputFormat(FORMAT, read_block_buf, *sample_block, DEFAULT_BLOCK_SIZE); - auto reader = std::make_shared(format); - auto sample_block_with_nulls = reader->read(); + QueryPipeline pipeline(Pipe(std::move(format))); + PullingPipelineExecutor executor(pipeline); + Block sample_block_with_nulls; + executor.pull(sample_block_with_nulls); LOG_DEBUG(log, "Dictionary sample block with null values: {}", sample_block_with_nulls.dumpStructure()); @@ -281,8 +284,10 @@ void LibraryRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServe auto & read_buf = request.getStream(); auto format = getContext()->getInputFormat(FORMAT, read_buf, *requested_sample_block, DEFAULT_BLOCK_SIZE); - auto reader = std::make_shared(format); - auto block = reader->read(); + QueryPipeline pipeline(std::move(format)); + PullingPipelineExecutor executor(pipeline); + Block block; + executor.pull(block); auto library_handler = SharedLibraryHandlerFactory::instance().get(dictionary_id); if (!library_handler) diff --git a/programs/library-bridge/SharedLibraryHandler.h b/programs/library-bridge/SharedLibraryHandler.h index f9d2fe43cb2..de1d098dc8d 100644 --- a/programs/library-bridge/SharedLibraryHandler.h +++ b/programs/library-bridge/SharedLibraryHandler.h @@ -2,7 +2,6 @@ #include #include -#include #include "LibraryUtils.h" diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 30082caaac1..4d8977c9e62 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,8 +1,6 @@ #include "LocalServer.h" #include -#include -#include #include #include #include @@ -10,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -20,19 +17,13 @@ #include #include #include -#include -#include #include -#include -#include #include #include #include #include #include -#include #include -#include #include #include #include @@ -43,9 +34,7 @@ #include #include #include -#include #include -#include #include #include @@ -128,10 +117,9 @@ bool LocalServer::executeMultiQuery(const String & all_queries_text) } case MultiQueryProcessingStage::PARSING_EXCEPTION: { - this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end); - this_query_begin = this_query_end; /// It's expected syntax error, skip the line - current_exception.reset(); - continue; + if (current_exception) + current_exception->rethrow(); + return true; } case MultiQueryProcessingStage::EXECUTE_QUERY: { @@ -514,19 +502,16 @@ void LocalServer::processConfig() format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); insert_format = "Values"; + /// Setting value from cmd arg overrides one from config if (global_context->getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; else insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); - /// Skip networking - /// Sets external authenticators config (LDAP, Kerberos). global_context->setExternalAuthenticatorsConfig(config()); - global_context->initializeBackgroundExecutors(); - setupUsers(); /// Limit on total number of concurrently executing queries. @@ -662,7 +647,7 @@ void LocalServer::printHelpMessage(const OptionsDescription & options_descriptio } -void LocalServer::addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +void LocalServer::addOptions(OptionsDescription & options_description) { options_description.main_description->add_options() ("database,d", po::value(), "database") @@ -680,11 +665,8 @@ void LocalServer::addAndCheckOptions(OptionsDescription & options_description, p ("logger.level", po::value(), "Log level") ("no-system-tables", "do not attach system tables (better startup time)") + ("path", po::value(), "Storage path") ; - - cmd_settings.addProgramOptions(options_description.main_description.value()); - po::parsed_options parsed = po::command_line_parser(arguments).options(options_description.main_description.value()).run(); - po::store(parsed, options); } @@ -739,6 +721,17 @@ int mainEntryClickHouseLocal(int argc, char ** argv) app.init(argc, argv); return app.run(); } + catch (const DB::Exception & e) + { + std::cerr << DB::getExceptionMessage(e, false) << std::endl; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; + } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index e14e18adced..ce0df06c86a 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -40,7 +40,7 @@ protected: String getQueryTextPrefix() override; void printHelpMessage(const OptionsDescription & options_description) override; - void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) override; + void addOptions(OptionsDescription & options_description) override; void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector &) override; void processConfig() override; diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index f18281a9fdf..caccc726923 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 51abe207095..82d1bd61c24 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -4,7 +4,6 @@ #include "ODBCBlockInputStream.h" #include "ODBCBlockOutputStream.h" #include "getIdentifierQuote.h" -#include #include #include #include @@ -15,9 +14,9 @@ #include #include #include -#include -#include +#include #include +#include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 0363e7f1dc3..d850ca45e26 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include #include #include @@ -844,7 +844,7 @@ if (ThreadFuzzer::instance().isEffective()) // FIXME logging-related things need synchronization -- see the 'Logger * log' saved // in a lot of places. For now, disable updating log configuration without server restart. //setTextLog(global_context->getTextLog()); - //buildLoggers(*config, logger()); + updateLevels(*config, logger()); global_context->setClustersConfig(config); global_context->setMacros(std::make_unique(*config, "macros", log)); global_context->setExternalAuthenticatorsConfig(*config); @@ -919,7 +919,7 @@ if (ThreadFuzzer::instance().isEffective()) /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. - global_context->initializeBackgroundExecutors(); + global_context->initializeBackgroundExecutorsIfNeeded(); if (settings.async_insert_threads) global_context->setAsynchronousInsertQueue(std::make_shared( @@ -997,8 +997,19 @@ if (ThreadFuzzer::instance().isEffective()) if (config().has("keeper_server")) { #if USE_NURAFT - /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. - global_context->initializeKeeperDispatcher(); + //// If we don't have configured connection probably someone trying to use clickhouse-server instead + //// of clickhouse-keeper, so start synchronously. + bool can_initialize_keeper_async = false; + + if (has_zookeeper) /// We have configured connection to some zookeeper cluster + { + /// If we cannot connect to some other node from our cluster then we have to wait our Keeper start + /// synchronously. + can_initialize_keeper_async = global_context->tryCheckClientConnectionToMyKeeperCluster(); + } + /// Initialize keeper RAFT. + global_context->initializeKeeperDispatcher(can_initialize_keeper_async); + for (const auto & listen_host : listen_hosts) { /// TCP Keeper diff --git a/programs/server/play.html b/programs/server/play.html index ac60ecfb624..728d2935558 100644 --- a/programs/server/play.html +++ b/programs/server/play.html @@ -338,7 +338,7 @@
- +
diff --git a/src/Bridge/LibraryBridgeHelper.cpp b/src/Bridge/LibraryBridgeHelper.cpp index 319c6c344d7..e5c6c09ba62 100644 --- a/src/Bridge/LibraryBridgeHelper.cpp +++ b/src/Bridge/LibraryBridgeHelper.cpp @@ -1,8 +1,8 @@ #include "LibraryBridgeHelper.h" -#include +#include #include -#include +#include #include #include #include diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cac5b70f489..5b360f9bb63 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -49,7 +49,7 @@ add_subdirectory (Backups) add_subdirectory (Columns) add_subdirectory (Common) add_subdirectory (Core) -add_subdirectory (DataStreams) +add_subdirectory (QueryPipeline) add_subdirectory (DataTypes) add_subdirectory (Dictionaries) add_subdirectory (Disks) @@ -185,7 +185,7 @@ add_object_library(clickhouse_backups Backups) add_object_library(clickhouse_core Core) add_object_library(clickhouse_core_mysql Core/MySQL) add_object_library(clickhouse_compression Compression) -add_object_library(clickhouse_datastreams DataStreams) +add_object_library(clickhouse_querypipeline QueryPipeline) add_object_library(clickhouse_datatypes DataTypes) add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations) add_object_library(clickhouse_databases Databases) @@ -214,6 +214,7 @@ add_object_library(clickhouse_processors_transforms Processors/Transforms) add_object_library(clickhouse_processors_sources Processors/Sources) add_object_library(clickhouse_processors_sinks Processors/Sinks) add_object_library(clickhouse_processors_merges Processors/Merges) +add_object_library(clickhouse_processors_ttl Processors/TTL) add_object_library(clickhouse_processors_merges_algorithms Processors/Merges/Algorithms) add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) @@ -284,6 +285,13 @@ target_link_libraries (clickhouse_common_io dragonbox_to_chars ) +# Use X86 AVX2/AVX512 instructions to accelerate filter operations +set_source_files_properties( + Columns/ColumnFixedString.cpp + Columns/ColumnsCommon.cpp + Columns/ColumnVector.cpp + PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}") + if(RE2_LIBRARY) target_link_libraries(clickhouse_common_io PUBLIC ${RE2_LIBRARY}) endif() diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 041a72a6273..a7c8fdb4641 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -45,14 +45,13 @@ #include #include #include -#include +#include #include #include #include #include #include -#include -#include +#include namespace fs = std::filesystem; @@ -72,6 +71,7 @@ namespace ErrorCodes extern const int UNEXPECTED_PACKET_FROM_SERVER; extern const int INVALID_USAGE_OF_INPUT; extern const int CANNOT_SET_SIGNAL_HANDLER; + extern const int UNRECOGNIZED_ARGUMENTS; } } @@ -267,7 +267,7 @@ void ClientBase::onLogData(Block & block) { initLogsOutputStream(); progress_indication.clearProgressOutput(); - logs_out_stream->write(block); + logs_out_stream->writeLogs(block); logs_out_stream->flush(); } @@ -294,7 +294,7 @@ void ClientBase::onReceiveExceptionFromServer(std::unique_ptr && e) } -void ClientBase::onProfileInfo(const BlockStreamProfileInfo & profile_info) +void ClientBase::onProfileInfo(const ProfileInfo & profile_info) { if (profile_info.hasAppliedLimit() && output_format) output_format->setRowsBeforeLimit(profile_info.getRowsBeforeLimit()); @@ -517,6 +517,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query) const size_t poll_interval = std::max(min_poll_interval, std::min(receive_timeout.totalMicroseconds(), default_poll_interval)); + bool break_on_timeout = connection->getConnectionType() != IServerConnection::Type::LOCAL; while (true) { Stopwatch receive_watch(CLOCK_MONOTONIC_COARSE); @@ -547,7 +548,7 @@ void ClientBase::receiveResult(ASTPtr parsed_query) else { double elapsed = receive_watch.elapsedSeconds(); - if (elapsed > receive_timeout.totalSeconds()) + if (break_on_timeout && elapsed > receive_timeout.totalSeconds()) { std::cout << "Timeout exceeded while receiving data from server." << " Waited for " << static_cast(elapsed) << " seconds," @@ -670,37 +671,59 @@ void ClientBase::onProfileEvents(Block & block) const auto rows = block.rows(); if (rows == 0) return; - const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); - const auto & names = typeid_cast(*block.getByName("name").column); - const auto & host_names = typeid_cast(*block.getByName("host_name").column); - const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); - const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); - const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); - - HostToThreadTimesMap thread_times; - for (size_t i = 0; i < rows; ++i) + if (progress_indication.print_hardware_utilization) { - auto thread_id = array_thread_id[i]; - auto host_name = host_names.getDataAt(i).toString(); - if (thread_id != 0) - progress_indication.addThreadIdToList(host_name, thread_id); - auto event_name = names.getDataAt(i); - auto value = array_values[i]; - if (event_name == user_time_name) + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & names = typeid_cast(*block.getByName("name").column); + const auto & host_names = typeid_cast(*block.getByName("host_name").column); + const auto & array_values = typeid_cast(*block.getByName("value").column).getData(); + + const auto * user_time_name = ProfileEvents::getName(ProfileEvents::UserTimeMicroseconds); + const auto * system_time_name = ProfileEvents::getName(ProfileEvents::SystemTimeMicroseconds); + + HostToThreadTimesMap thread_times; + for (size_t i = 0; i < rows; ++i) { - thread_times[host_name][thread_id].user_ms = value; + auto thread_id = array_thread_id[i]; + auto host_name = host_names.getDataAt(i).toString(); + if (thread_id != 0) + progress_indication.addThreadIdToList(host_name, thread_id); + auto event_name = names.getDataAt(i); + auto value = array_values[i]; + if (event_name == user_time_name) + { + thread_times[host_name][thread_id].user_ms = value; + } + else if (event_name == system_time_name) + { + thread_times[host_name][thread_id].system_ms = value; + } + else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + { + thread_times[host_name][thread_id].memory_usage = value; + } } - else if (event_name == system_time_name) + progress_indication.updateThreadEventData(thread_times); + } + + if (profile_events.print) + { + if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms) { - thread_times[host_name][thread_id].system_ms = value; + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(block); + logs_out_stream->flush(); + + profile_events.watch.restart(); + profile_events.last_block = {}; } - else if (event_name == MemoryTracker::USAGE_EVENT_NAME) + else { - thread_times[host_name][thread_id].memory_usage = value; + profile_events.last_block = block; } } - progress_indication.updateThreadEventData(thread_times); } @@ -1023,6 +1046,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin processed_rows = 0; written_first_block = false; progress_indication.resetProgress(); + profile_events.watch.restart(); { /// Temporarily apply query settings to context. @@ -1091,6 +1115,15 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } } + /// Always print last block (if it was not printed already) + if (profile_events.last_block) + { + initLogsOutputStream(); + progress_indication.clearProgressOutput(); + logs_out_stream->writeProfileEvents(profile_events.last_block); + logs_out_stream->flush(); + } + if (is_interactive) { std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. "; @@ -1505,6 +1538,26 @@ void ClientBase::readArguments(int argc, char ** argv, Arguments & common_argume } } +void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) +{ + cmd_settings.addProgramOptions(options_description.main_description.value()); + /// Parse main commandline options. + auto parser = po::command_line_parser(arguments).options(options_description.main_description.value()).allow_unregistered(); + po::parsed_options parsed = parser.run(); + + /// Check unrecognized options without positional options. + auto unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::exclude_positional); + if (!unrecognized_options.empty()) + throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); + + /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). + unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); + if (unrecognized_options.size() > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); + + po::store(parsed, options); +} + void ClientBase::init(int argc, char ** argv) { @@ -1560,9 +1613,13 @@ void ClientBase::init(int argc, char ** argv) ("ignore-error", "do not stop processing in multiquery mode") ("stacktrace", "print stack traces of exceptions") + ("hardware-utilization", "print hardware utilization information in progress bar") + ("print-profile-events", po::value(&profile_events.print)->zero_tokens(), "Printing ProfileEvents packets") + ("profile-events-delay-ms", po::value()->default_value(profile_events.delay_ms), "Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet)") ; - addAndCheckOptions(options_description, options, common_arguments); + addOptions(options_description); + parseAndCheckOptions(options_description, options, common_arguments); po::notify(options); if (options.count("version") || options.count("V")) @@ -1610,6 +1667,10 @@ void ClientBase::init(int argc, char ** argv) config().setBool("vertical", true); if (options.count("stacktrace")) config().setBool("stacktrace", true); + if (options.count("print-profile-events")) + config().setBool("print-profile-events", true); + if (options.count("profile-events-delay-ms")) + config().setInt("profile-events-delay-ms", options["profile-events-delay-ms"].as()); if (options.count("progress")) config().setBool("progress", true); if (options.count("echo")) @@ -1626,8 +1687,12 @@ void ClientBase::init(int argc, char ** argv) config().setBool("verbose", true); if (options.count("log-level")) Poco::Logger::root().setLevel(options["log-level"].as()); + if (options.count("hardware-utilization")) + progress_indication.print_hardware_utilization = true; query_processing_stage = QueryProcessingStage::fromString(options["stage"].as()); + profile_events.print = options.count("print-profile-events"); + profile_events.delay_ms = options["profile-events-delay-ms"].as(); processOptions(options_description, options, external_tables_arguments); argsToConfig(common_arguments, config(), 100); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index d9034534797..2e6cd1f66a6 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -91,7 +92,7 @@ protected: }; virtual void printHelpMessage(const OptionsDescription & options_description) = 0; - virtual void addAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments) = 0; + virtual void addOptions(OptionsDescription & options_description) = 0; virtual void processOptions(const OptionsDescription & options_description, const CommandLineOptions & options, const std::vector & external_tables_arguments) = 0; @@ -112,7 +113,7 @@ private: void onTotals(Block & block, ASTPtr parsed_query); void onExtremes(Block & block, ASTPtr parsed_query); void onReceiveExceptionFromServer(std::unique_ptr && e); - void onProfileInfo(const BlockStreamProfileInfo & profile_info); + void onProfileInfo(const ProfileInfo & profile_info); void onEndOfStream(); void onProfileEvents(Block & block); @@ -132,6 +133,7 @@ private: void resetOutput(); void outputQueryInfo(bool echo_query_); void readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector & external_tables_arguments); + void parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments); protected: bool is_interactive = false; /// Use either interactive line editing interface or batch mode. @@ -217,6 +219,16 @@ protected: QueryFuzzer fuzzer; int query_fuzzer_runs = 0; + struct + { + bool print = false; + /// UINT64_MAX -- print only last + UInt64 delay_ms = 0; + Stopwatch watch; + /// For printing only last (delay_ms == 0). + Block last_block; + } profile_events; + QueryProcessingStage::Enum query_processing_stage; }; diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index a530e48ee35..e1c1481c5b4 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -109,29 +109,29 @@ void highlight(const String & query, std::vector & colors {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, {TokenType::DoubleColon, Replxx::Color::BROWN}, - {TokenType::OpeningCurlyBrace, Replxx::Color::INTENSE}, - {TokenType::ClosingCurlyBrace, Replxx::Color::INTENSE}, + {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Comma, Replxx::Color::INTENSE}, - {TokenType::Semicolon, Replxx::Color::INTENSE}, - {TokenType::Dot, Replxx::Color::INTENSE}, - {TokenType::Asterisk, Replxx::Color::INTENSE}, + {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::HereDoc, Replxx::Color::CYAN}, - {TokenType::Plus, Replxx::Color::INTENSE}, - {TokenType::Minus, Replxx::Color::INTENSE}, - {TokenType::Slash, Replxx::Color::INTENSE}, - {TokenType::Percent, Replxx::Color::INTENSE}, - {TokenType::Arrow, Replxx::Color::INTENSE}, - {TokenType::QuestionMark, Replxx::Color::INTENSE}, - {TokenType::Colon, Replxx::Color::INTENSE}, - {TokenType::Equals, Replxx::Color::INTENSE}, - {TokenType::NotEquals, Replxx::Color::INTENSE}, - {TokenType::Less, Replxx::Color::INTENSE}, - {TokenType::Greater, Replxx::Color::INTENSE}, - {TokenType::LessOrEquals, Replxx::Color::INTENSE}, - {TokenType::GreaterOrEquals, Replxx::Color::INTENSE}, - {TokenType::Concatenation, Replxx::Color::INTENSE}, - {TokenType::At, Replxx::Color::INTENSE}, + {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, {TokenType::DoubleAt, Replxx::Color::MAGENTA}, {TokenType::EndOfStream, Replxx::Color::DEFAULT}, @@ -142,7 +142,7 @@ void highlight(const String & query, std::vector & colors {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, {TokenType::ErrorWrongNumber, Replxx::Color::RED}, - { TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED }}; + {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; const Replxx::Color unknown_token_color = Replxx::Color::RED; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 1531e6c1e91..3c920e8cabe 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -10,8 +10,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -25,8 +25,8 @@ #include "Core/Block.h" #include #include -#include -#include +#include +#include #include #include #include @@ -1017,9 +1017,9 @@ Progress Connection::receiveProgress() const } -BlockStreamProfileInfo Connection::receiveProfileInfo() const +ProfileInfo Connection::receiveProfileInfo() const { - BlockStreamProfileInfo profile_info; + ProfileInfo profile_info; profile_info.read(*in); return profile_info; } diff --git a/src/Client/Connection.h b/src/Client/Connection.h index b6054941aeb..17d6b41a2e5 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -60,6 +60,8 @@ public: ~Connection() override; + IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::SERVER; } + static ServerConnectionPtr createConnection(const ConnectionParameters & parameters, ContextPtr context); /// Set throttler of network traffic. One throttler could be used for multiple connections to limit total traffic. @@ -255,7 +257,7 @@ private: std::vector receiveMultistringMessage(UInt64 msg_type) const; std::unique_ptr receiveException() const; Progress receiveProgress() const; - BlockStreamProfileInfo receiveProfileInfo() const; + ProfileInfo receiveProfileInfo() const; void initInputBuffers(); void initBlockInput(); diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 5584cf72bbf..9d6b54ef32f 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -6,10 +6,9 @@ #include #include -#include -#include +#include -#include +#include #include #include @@ -31,7 +30,7 @@ struct Packet std::unique_ptr exception; std::vector multistring_message; Progress progress; - BlockStreamProfileInfo profile_info; + ProfileInfo profile_info; std::vector part_uuids; Packet() : type(Protocol::Server::Hello) {} @@ -57,6 +56,14 @@ class IServerConnection : boost::noncopyable public: virtual ~IServerConnection() = default; + enum class Type + { + SERVER, + LOCAL + }; + + virtual Type getConnectionType() const = 0; + virtual void setDefaultDatabase(const String & database) = 0; virtual void getServerVersion( diff --git a/src/DataStreams/InternalTextLogs.cpp b/src/Client/InternalTextLogs.cpp similarity index 58% rename from src/DataStreams/InternalTextLogs.cpp rename to src/Client/InternalTextLogs.cpp index a5883d17f28..430ba6daf0a 100644 --- a/src/DataStreams/InternalTextLogs.cpp +++ b/src/Client/InternalTextLogs.cpp @@ -1,6 +1,7 @@ -#include "InternalTextLogs.h" +#include #include #include +#include #include #include #include @@ -13,7 +14,7 @@ namespace DB { -void InternalTextLogs::write(const Block & block) +void InternalTextLogs::writeLogs(const Block & block) { const auto & array_event_time = typeid_cast(*block.getByName("event_time").column).getData(); const auto & array_microseconds = typeid_cast(*block.getByName("event_time_microseconds").column).getData(); @@ -97,4 +98,69 @@ void InternalTextLogs::write(const Block & block) } } +void InternalTextLogs::writeProfileEvents(const Block & block) +{ + const auto & column_host_name = typeid_cast(*block.getByName("host_name").column); + const auto & array_current_time = typeid_cast(*block.getByName("current_time").column).getData(); + const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); + const auto & array_type = typeid_cast(*block.getByName("type").column).getData(); + const auto & column_name = typeid_cast(*block.getByName("name").column); + const auto & array_value = typeid_cast(*block.getByName("value").column).getData(); + + for (size_t row_num = 0; row_num < block.rows(); ++row_num) + { + /// host_name + auto host_name = column_host_name.getDataAt(row_num); + if (host_name.size) + { + writeCString("[", wb); + if (color) + writeString(setColor(StringRefHash()(host_name)), wb); + writeString(host_name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString("] ", wb); + } + + /// current_time + auto current_time = array_current_time[row_num]; + writeDateTimeText<'.', ':'>(current_time, wb); + + /// thread_id + UInt64 thread_id = array_thread_id[row_num]; + writeCString(" [ ", wb); + if (color) + writeString(setColor(intHash64(thread_id)), wb); + writeIntText(thread_id, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(" ] ", wb); + + /// name + auto name = column_name.getDataAt(row_num); + if (color) + writeString(setColor(StringRefHash()(name)), wb); + DB::writeString(name, wb); + if (color) + writeCString(resetColor(), wb); + writeCString(": ", wb); + + /// value + UInt64 value = array_value[row_num]; + writeIntText(value, wb); + + //// type + Int8 type = array_type[row_num]; + writeCString(" (", wb); + if (color) + writeString(setColor(intHash64(type)), wb); + writeString(toString(ProfileEvents::TypeEnum->castToName(type)), wb); + if (color) + writeCString(resetColor(), wb); + writeCString(")", wb); + + writeChar('\n', wb); + } +} + } diff --git a/src/Client/InternalTextLogs.h b/src/Client/InternalTextLogs.h new file mode 100644 index 00000000000..0690211fd24 --- /dev/null +++ b/src/Client/InternalTextLogs.h @@ -0,0 +1,51 @@ +#pragma once +#include +#include + + +namespace DB +{ + +/// Prints internal server logs or profile events with colored output (if requested). +/// NOTE: IRowOutputFormat does not suite well for this case +class InternalTextLogs +{ +public: + InternalTextLogs(WriteBuffer & buf_out, bool color_) : wb(buf_out), color(color_) {} + + /// Print internal server logs + /// + /// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock(): + /// - event_time + /// - event_time_microseconds + /// - host_name + /// - query_id + /// - thread_id + /// - priority + /// - source + /// - text + void writeLogs(const Block & block); + /// Print profile events. + /// + /// Block: + /// - host_name + /// - current_time + /// - thread_id + /// - type + /// - name + /// - value + /// + /// See also TCPHandler::sendProfileEvents() for block columns. + void writeProfileEvents(const Block & block); + + void flush() + { + wb.next(); + } + +private: + WriteBuffer & wb; + bool color; +}; + +} diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index efd302622dd..55d3a2d6a5f 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -5,7 +5,7 @@ #include #include #include -#include "Core/Protocol.h" +#include namespace DB @@ -60,15 +60,15 @@ void LocalConnection::updateProgress(const Progress & value) void LocalConnection::sendQuery( const ConnectionTimeouts &, - const String & query_, - const String & query_id_, - UInt64, + const String & query, + const String & query_id, + UInt64 stage, const Settings *, const ClientInfo *, bool) { query_context = session.makeQueryContext(); - query_context->setCurrentQueryId(query_id_); + query_context->setCurrentQueryId(query_id); if (send_progress) query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); @@ -77,8 +77,9 @@ void LocalConnection::sendQuery( state.reset(); state.emplace(); - state->query_id = query_id_; - state->query = query_; + state->query_id = query_id; + state->query = query; + state->stage = QueryProcessingStage::Enum(stage); if (send_progress) state->after_send_progress.restart(); @@ -104,6 +105,16 @@ void LocalConnection::sendQuery( state->pushing_executor->start(); state->block = state->pushing_executor->getHeader(); } + + const auto & table_id = query_context->getInsertionTable(); + if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) + { + if (!table_id.empty()) + { + auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, query_context); + state->columns_description = storage_ptr->getInMemoryMetadataPtr()->getColumns(); + } + } } else if (state->io.pipeline.pulling()) { @@ -116,7 +127,9 @@ void LocalConnection::sendQuery( executor.execute(); } - if (state->block) + if (state->columns_description) + next_packet_type = Protocol::Server::TableColumns; + else if (state->block) next_packet_type = Protocol::Server::Data; } catch (const Exception & e) @@ -266,13 +279,6 @@ bool LocalConnection::poll(size_t) } } - if (state->is_finished && send_progress && !state->sent_progress) - { - state->sent_progress = true; - next_packet_type = Protocol::Server::Progress; - return true; - } - if (state->is_finished) { finishQuery(); @@ -292,7 +298,8 @@ bool LocalConnection::pollImpl() { Block block; auto next_read = pullBlock(block); - if (block) + + if (block && !state->io.null_format) { state->block.emplace(block); } @@ -336,21 +343,41 @@ Packet LocalConnection::receivePacket() packet.block = std::move(state->block.value()); state->block.reset(); } + next_packet_type.reset(); + break; + } + case Protocol::Server::TableColumns: + { + if (state->columns_description) + { + /// Send external table name (empty name is the main table) + /// (see TCPHandler::sendTableColumns) + packet.multistring_message = {"", state->columns_description->toString()}; + } + + if (state->block) + { + next_packet_type = Protocol::Server::Data; + } + break; } case Protocol::Server::Exception: { packet.exception = std::make_unique(*state->exception); + next_packet_type.reset(); break; } case Protocol::Server::Progress: { packet.progress = std::move(state->progress); state->progress.reset(); + next_packet_type.reset(); break; } case Protocol::Server::EndOfStream: { + next_packet_type.reset(); break; } default: @@ -358,7 +385,6 @@ Packet LocalConnection::receivePacket() "Unknown packet {} for {}", toString(packet.type), getDescription()); } - next_packet_type.reset(); return packet; } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index dcea3ed0fc3..1cc23defa6e 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -2,9 +2,10 @@ #include "Connection.h" #include -#include +#include #include #include +#include namespace DB @@ -33,6 +34,7 @@ struct LocalQueryState /// Current block to be sent next. std::optional block; + std::optional columns_description; /// Is request cancelled bool is_cancelled = false; @@ -56,6 +58,8 @@ public: ~LocalConnection() override; + IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::LOCAL; } + static ServerConnectionPtr createConnection(const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false); void setDefaultDatabase(const String & database) override; @@ -76,7 +80,7 @@ public: void sendQuery( const ConnectionTimeouts & timeouts, const String & query, - const String & query_id_/* = "" */, + const String & query_id/* = "" */, UInt64 stage/* = QueryProcessingStage::Complete */, const Settings * settings/* = nullptr */, const ClientInfo * client_info/* = nullptr */, diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index d8c5ced4b6b..dc236fafbd9 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -237,6 +237,26 @@ ColumnPtr ColumnDecimal::filter(const IColumn::Filter & filt, ssize_t result_ const UInt8 * filt_end = filt_pos + size; const T * data_pos = data.data(); +#ifdef __SSE2__ + static constexpr size_t SIMD_BYTES = 16; + const __m128i zero16 = _mm_setzero_si128(); + const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_sse) + { + UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast(filt_pos)), zero16)); + mask = ~mask; + while (mask) + { + size_t index = __builtin_ctz(mask); + res_data.push_back(*(data_pos + index)); + mask = mask & (mask - 1); + } + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } +#endif + while (filt_pos < filt_end) { if (*filt_pos) diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 94127fa8eb3..729d82da41d 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -231,12 +231,79 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result const UInt8 * filt_end = filt_pos + col_size; const UInt8 * data_pos = chars.data(); -#ifdef __SSE2__ /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. */ +#if defined(__AVX512F__) && defined(__AVX512BW__) + static constexpr size_t SIMD_BYTES = 64; + const __m512i zero64 = _mm512_setzero_epi32(); + const UInt8 * filt_end_avx512 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; + const size_t chars_per_simd_elements = SIMD_BYTES * n; + + while (filt_pos < filt_end_avx512) + { + uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); + + if (0xFFFFFFFFFFFFFFFF == mask) + { + res->chars.insert(data_pos, data_pos + chars_per_simd_elements); + } + else + { + size_t res_chars_size = res->chars.size(); + while (mask) + { + size_t index = __builtin_ctzll(mask); + res->chars.resize(res_chars_size + n); + memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n); + res_chars_size += n; + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + data_pos += chars_per_simd_elements; + filt_pos += SIMD_BYTES; + } +#elif defined(__AVX__) && defined(__AVX2__) + static constexpr size_t SIMD_BYTES = 32; + const __m256i zero32 = _mm256_setzero_si256(); + const UInt8 * filt_end_avx2 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES; + const size_t chars_per_simd_elements = SIMD_BYTES * n; + + while (filt_pos < filt_end_avx2) + { + uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); + + if (0xFFFFFFFF == mask) + { + res->chars.insert(data_pos, data_pos + chars_per_simd_elements); + } + else + { + size_t res_chars_size = res->chars.size(); + while (mask) + { + size_t index = __builtin_ctz(mask); + res->chars.resize(res_chars_size + n); + memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n); + res_chars_size += n; + #ifdef __BMI__ + mask = _blsr_u32(mask); + #else + mask = mask & (mask-1); + #endif + } + } + data_pos += chars_per_simd_elements; + filt_pos += SIMD_BYTES; + } + +#elif defined(__SSE2__) static constexpr size_t SIMD_BYTES = 16; const __m128i zero16 = _mm_setzero_si128(); @@ -267,6 +334,7 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result data_pos += chars_per_simd_elements; filt_pos += SIMD_BYTES; } + #endif size_t res_chars_size = res->chars.size(); diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 3ee692a3ff4..4793b6bd9d5 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -310,14 +310,74 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s const UInt8 * filt_pos = filt.data(); const UInt8 * filt_end = filt_pos + size; const T * data_pos = data.data(); - -#ifdef __SSE2__ /** A slightly more optimized version. * Based on the assumption that often pieces of consecutive values * completely pass or do not pass the filter. * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. */ +#if defined(__AVX512F__) && defined(__AVX512BW__) + static constexpr size_t SIMD_BYTES = 64; + const __m512i zero64 = _mm512_setzero_epi32(); + const UInt8 * filt_end_avx512 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + while (filt_pos < filt_end_avx512) + { + UInt64 mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero64, _MM_CMPINT_GT); + + if (0xFFFFFFFFFFFFFFFF == mask) + { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } + else + { + while (mask) + { + size_t index = __builtin_ctzll(mask); + res_data.push_back(data_pos[index]); + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } + +#elif defined(__AVX__) && defined(__AVX2__) + static constexpr size_t SIMD_BYTES = 32; + const __m256i zero32 = _mm256_setzero_si256(); + const UInt8 * filt_end_avx2 = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_avx2) + { + UInt32 mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero32)); + + if (0xFFFFFFFF == mask) + { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } + else + { + while (mask) + { + size_t index = __builtin_ctz(mask); + res_data.push_back(data_pos[index]); + #ifdef __BMI__ + mask = _blsr_u32(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } + +#elif defined(__SSE2__) static constexpr size_t SIMD_BYTES = 16; const __m128i zero16 = _mm_setzero_si128(); const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES; @@ -344,6 +404,7 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s filt_pos += SIMD_BYTES; data_pos += SIMD_BYTES; } + #endif while (filt_pos < filt_end) diff --git a/src/Columns/ColumnsCommon.cpp b/src/Columns/ColumnsCommon.cpp index a4d7de34382..ea5ca4e45b4 100644 --- a/src/Columns/ColumnsCommon.cpp +++ b/src/Columns/ColumnsCommon.cpp @@ -229,7 +229,89 @@ namespace memcpy(&res_elems[elems_size_old], &src_elems[arr_offset], arr_size * sizeof(T)); }; - #ifdef __SSE2__ + #if defined(__AVX512F__) && defined(__AVX512BW__) + const __m512i zero_vec = _mm512_setzero_epi32(); + static constexpr size_t SIMD_BYTES = 64; + const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_aligned) + { + uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(filt_pos)), zero_vec, _MM_CMPINT_GT); + + if (mask == 0xffffffffffffffff) + { + /// SIMD_BYTES consecutive rows pass the filter + const auto first = offsets_pos == offsets_begin; + + const auto chunk_offset = first ? 0 : offsets_pos[-1]; + const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset; + + result_offsets_builder.template insertChunk(offsets_pos, first, chunk_offset, chunk_size); + + /// copy elements for SIMD_BYTES arrays at once + const auto elems_size_old = res_elems.size(); + res_elems.resize(elems_size_old + chunk_size); + memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T)); + } + else + { + while (mask) + { + size_t index = __builtin_ctzll(mask); + copy_array(offsets_pos + index); + #ifdef __BMI__ + mask = _blsr_u64(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + offsets_pos += SIMD_BYTES; + } + #elif defined(__AVX__) && defined(__AVX2__) + const __m256i zero_vec = _mm256_setzero_si256(); + static constexpr size_t SIMD_BYTES = 32; + const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_aligned) + { + uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast(filt_pos)), zero_vec)); + + if (mask == 0xffffffff) + { + /// SIMD_BYTES consecutive rows pass the filter + const auto first = offsets_pos == offsets_begin; + + const auto chunk_offset = first ? 0 : offsets_pos[-1]; + const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset; + + result_offsets_builder.template insertChunk(offsets_pos, first, chunk_offset, chunk_size); + + /// copy elements for SIMD_BYTES arrays at once + const auto elems_size_old = res_elems.size(); + res_elems.resize(elems_size_old + chunk_size); + memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T)); + } + else + { + while (mask) + { + size_t index = __builtin_ctz(mask); + copy_array(offsets_pos + index); + #ifdef __BMI__ + mask = _blsr_u32(mask); + #else + mask = mask & (mask-1); + #endif + } + } + + filt_pos += SIMD_BYTES; + offsets_pos += SIMD_BYTES; + } + #elif defined(__SSE2__) const __m128i zero_vec = _mm_setzero_si128(); static constexpr size_t SIMD_BYTES = 16; const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index b6d9b65c28b..1aff1460125 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -589,6 +589,8 @@ M(619, POSTGRESQL_REPLICATION_INTERNAL_ERROR) \ M(620, QUERY_NOT_ALLOWED) \ M(621, CANNOT_NORMALIZE_STRING) \ + M(622, CANNOT_PARSE_CAPN_PROTO_SCHEMA) \ + M(623, CAPN_PROTO_BAD_CAST) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 0fe40b306cb..bf3397f50e1 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -1,11 +1,14 @@ #include "ProgressIndication.h" +#include #include #include #include #include #include +#include "Common/formatReadable.h" #include #include +#include "IO/WriteBufferFromString.h" #include @@ -113,16 +116,17 @@ UInt64 ProgressIndication::getApproximateCoresNumber() const }); } -UInt64 ProgressIndication::getMemoryUsage() const +ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const { - return std::accumulate(thread_data.cbegin(), thread_data.cend(), ZERO, - [](UInt64 acc, auto const & host_data) + return std::accumulate(thread_data.cbegin(), thread_data.cend(), MemoryUsage{}, + [](MemoryUsage const & acc, auto const & host_data) { - return acc + std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO, + auto host_usage = std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO, [](UInt64 memory, auto const & data) { return memory + data.second.memory_usage; }); + return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)}; }); } @@ -189,6 +193,27 @@ void ProgressIndication::writeProgress() written_progress_chars = message.count() - prefix_size - (strlen(indicator) - 2); /// Don't count invisible output (escape sequences). + // If approximate cores number is known, display it. + auto cores_number = getApproximateCoresNumber(); + std::string profiling_msg; + if (cores_number != 0 && print_hardware_utilization) + { + WriteBufferFromOwnString profiling_msg_builder; + // Calculated cores number may be not accurate + // so it's better to print min(threads, cores). + UInt64 threads_number = getUsedThreadsCount(); + profiling_msg_builder << " Running " << threads_number << " threads on " + << std::min(cores_number, threads_number) << " cores"; + + auto [memory_usage, max_host_usage] = getMemoryUsage(); + if (memory_usage != 0) + profiling_msg_builder << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used"; + if (thread_data.size() > 1 && max_host_usage) + profiling_msg_builder << " total (per host max: " << formatReadableSizeWithDecimalSuffix(max_host_usage) << ")"; + profiling_msg_builder << "."; + profiling_msg = profiling_msg_builder.str(); + } + /// If the approximate number of rows to process is known, we can display a progress bar and percentage. if (progress.total_rows_to_read || progress.total_raw_bytes_to_read) { @@ -215,7 +240,7 @@ void ProgressIndication::writeProgress() if (show_progress_bar) { - ssize_t width_of_progress_bar = static_cast(terminal_width) - written_progress_chars - strlen(" 99%"); + ssize_t width_of_progress_bar = static_cast(terminal_width) - written_progress_chars - strlen(" 99%") - profiling_msg.length(); if (width_of_progress_bar > 0) { std::string bar @@ -231,23 +256,7 @@ void ProgressIndication::writeProgress() message << ' ' << (99 * current_count / max_count) << '%'; } - // If approximate cores number is known, display it. - auto cores_number = getApproximateCoresNumber(); - if (cores_number != 0) - { - // Calculated cores number may be not accurate - // so it's better to print min(threads, cores). - UInt64 threads_number = getUsedThreadsCount(); - message << " Running " << threads_number << " threads on " - << std::min(cores_number, threads_number) << " cores"; - - auto memory_usage = getMemoryUsage(); - if (memory_usage != 0) - message << " with " << formatReadableSizeWithDecimalSuffix(memory_usage) << " RAM used."; - else - message << "."; - } - + message << profiling_msg; message << CLEAR_TO_END_OF_LINE; ++increment; diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index 3d9bbc7f3ff..d31ed8df0ba 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -60,13 +60,21 @@ public: void updateThreadEventData(HostToThreadTimesMap & new_thread_data); + bool print_hardware_utilization = false; + private: size_t getUsedThreadsCount() const; UInt64 getApproximateCoresNumber() const; - UInt64 getMemoryUsage() const; + struct MemoryUsage + { + UInt64 total = 0; + UInt64 max = 0; + }; + + MemoryUsage getMemoryUsage() const; /// This flag controls whether to show the progress bar. We start showing it after /// the query has been executing for 0.5 seconds, and is still less than half complete. diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 7b905937e11..aa40226093a 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -124,11 +124,13 @@ QueryProfilerBase::QueryProfilerBase(const UInt64 thread_id, const sev.sigev_notify = SIGEV_THREAD_ID; sev.sigev_signo = pause_signal; -# if defined(OS_FREEBSD) +#if defined(OS_FREEBSD) sev._sigev_un._threadid = thread_id; -# else +#elif defined(USE_MUSL) + sev.sigev_notify_thread_id = thread_id; +#else sev._sigev_un._tid = thread_id; -# endif +#endif if (timer_create(clock_type, &sev, &timer_id)) { /// In Google Cloud Run, the function "timer_create" is implemented incorrectly as of 2020-01-25. diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index e72681621cb..020f3cc4446 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -77,3 +77,6 @@ target_link_libraries (average PRIVATE clickhouse_common_io) add_executable (shell_command_inout shell_command_inout.cpp) target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io) + +add_executable (executable_udf executable_udf.cpp) +target_link_libraries (executable_udf PRIVATE dbms) diff --git a/src/Common/examples/executable_udf.cpp b/src/Common/examples/executable_udf.cpp new file mode 100644 index 00000000000..8d2d9f7314e --- /dev/null +++ b/src/Common/examples/executable_udf.cpp @@ -0,0 +1,46 @@ +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +using namespace DB; + +int main(int argc, char **argv) +{ + (void)(argc); + (void)(argv); + + std::string buffer; + + ReadBufferFromFileDescriptor read_buffer(0); + WriteBufferFromFileDescriptor write_buffer(1); + size_t rows = 0; + char dummy; + + while (!read_buffer.eof()) + { + readIntText(rows, read_buffer); + readChar(dummy, read_buffer); + + for (size_t i = 0; i < rows; ++i) + { + readString(buffer, read_buffer); + readChar(dummy, read_buffer); + + writeString("Key ", write_buffer); + writeString(buffer, write_buffer); + writeChar('\n', write_buffer); + } + + write_buffer.next(); + } + + return 0; +} diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index 89214ad496e..f9fe8c97a14 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -118,7 +118,7 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p return absolute_path.starts_with(absolute_prefix_path); } -bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) +bool fileOrSymlinkPathStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path) { /// Differs from pathStartsWith in how `path` is normalized before comparison. /// Make `path` absolute if it was relative and put it into normalized form: remove @@ -140,13 +140,14 @@ bool pathStartsWith(const String & path, const String & prefix_path) return pathStartsWith(filesystem_path, filesystem_prefix_path); } -bool symlinkStartsWith(const String & path, const String & prefix_path) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path) { auto filesystem_path = std::filesystem::path(path); auto filesystem_prefix_path = std::filesystem::path(prefix_path); - return symlinkStartsWith(filesystem_path, filesystem_prefix_path); + return fileOrSymlinkPathStartsWith(filesystem_path, filesystem_prefix_path); } + } diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index de5802cde6d..fc3a4f15573 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -35,8 +35,9 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p /// Returns true if path starts with prefix path bool pathStartsWith(const String & path, const String & prefix_path); -/// Returns true if symlink starts with prefix path -bool symlinkStartsWith(const String & path, const String & prefix_path); +/// Same as pathStartsWith, but without canonization, i.e. allowed to check symlinks. +/// (Path is made absolute and normalized.) +bool fileOrSymlinkPathStartsWith(const String & path, const String & prefix_path); } diff --git a/src/Common/malloc.cpp b/src/Common/malloc.cpp index ec472d5d1d6..88281d9c80e 100644 --- a/src/Common/malloc.cpp +++ b/src/Common/malloc.cpp @@ -17,7 +17,9 @@ extern "C" void *aligned_alloc(size_t alignment, size_t size); void *valloc(size_t size); void *memalign(size_t alignment, size_t size); +#if !defined(USE_MUSL) void *pvalloc(size_t size); +#endif } #pragma GCC diagnostic pop @@ -39,6 +41,8 @@ static void dummyFunctionForInterposing() ignore(aligned_alloc(0, 0)); // -V575 NOLINT ignore(valloc(0)); // -V575 NOLINT ignore(memalign(0, 0)); // -V575 NOLINT +#if !defined(USE_MUSL) ignore(pvalloc(0)); // -V575 NOLINT +#endif } #endif diff --git a/src/Compression/CompressionFactory.h b/src/Compression/CompressionFactory.h index f00e5071990..2101dc28c65 100644 --- a/src/Compression/CompressionFactory.h +++ b/src/Compression/CompressionFactory.h @@ -40,13 +40,7 @@ public: CompressionCodecPtr getDefaultCodec() const; /// Validate codecs AST specified by user and parses codecs description (substitute default parameters) - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const; - - /// Just wrapper for previous method. - ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const - { - return validateCodecAndGetPreprocessedAST(ast, column_type.get(), sanity_check, allow_experimental_codecs); - } + ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const; /// Validate codecs AST specified by user void validateCodec(const String & family_name, std::optional level, bool sanity_check, bool allow_experimental_codecs) const; diff --git a/src/Compression/CompressionFactoryAdditions.cpp b/src/Compression/CompressionFactoryAdditions.cpp index 9e0353e6711..b5f00c60827 100644 --- a/src/Compression/CompressionFactoryAdditions.cpp +++ b/src/Compression/CompressionFactoryAdditions.cpp @@ -53,7 +53,7 @@ void CompressionCodecFactory::validateCodec( ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( - const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const + const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const { if (const auto * func = ast->as()) { @@ -100,12 +100,13 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( if (column_type) { CompressionCodecPtr prev_codec; - IDataType::StreamCallbackWithType callback = [&]( - const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type) + ISerialization::StreamCallback callback = [&](const auto & substream_path) { + assert(!substream_path.empty()); if (ISerialization::isSpecialCompressionAllowed(substream_path)) { - result_codec = getImpl(codec_family_name, codec_arguments, &substream_type); + const auto & last_type = substream_path.back().data.type; + result_codec = getImpl(codec_family_name, codec_arguments, last_type.get()); /// Case for column Tuple, which compressed with codec which depends on data type, like Delta. /// We cannot substitute parameters for such codecs. @@ -115,8 +116,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST( } }; - ISerialization::SubstreamPath stream_path; - column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path); + ISerialization::SubstreamPath path; + column_type->getDefaultSerialization()->enumerateStreams(path, callback, column_type, nullptr); if (!result_codec) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName()); diff --git a/src/Coordination/CoordinationSettings.h b/src/Coordination/CoordinationSettings.h index 805dedab89c..00d443822e6 100644 --- a/src/Coordination/CoordinationSettings.h +++ b/src/Coordination/CoordinationSettings.h @@ -28,7 +28,7 @@ struct Settings; M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ M(Milliseconds, shutdown_timeout, 5000, "How many time we will until RAFT shutdown", 0) \ - M(Milliseconds, startup_timeout, 30000, "How many time we will until RAFT to start", 0) \ + M(Milliseconds, startup_timeout, 180000, "How many time we will until RAFT to start", 0) \ M(LogsLevel, raft_logs_level, LogsLevel::information, "Log internal RAFT logs into main server log level. Valid values: 'trace', 'debug', 'information', 'warning', 'error', 'fatal', 'none'", 0) \ M(UInt64, rotate_log_storage_interval, 100000, "How many records will be stored in one log storage file", 0) \ M(UInt64, snapshots_to_keep, 3, "How many compressed snapshots to keep on disk", 0) \ diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index a28e8d96915..ebeba5e71de 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -250,7 +250,7 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ return true; } -void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper) +void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async) { LOG_DEBUG(log, "Initializing storage dispatcher"); int myid = config.getInt("keeper_server.server_id"); @@ -271,8 +271,16 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf server->startup(); LOG_DEBUG(log, "Server initialized, waiting for quorum"); - server->waitInit(); - LOG_DEBUG(log, "Quorum initialized"); + if (!start_async) + { + server->waitInit(); + LOG_DEBUG(log, "Quorum initialized"); + } + else + { + LOG_INFO(log, "Starting Keeper asynchronously, server will accept connections to Keeper when it will be ready"); + } + } catch (...) { @@ -366,7 +374,7 @@ void KeeperDispatcher::sessionCleanerTask() try { /// Only leader node must check dead sessions - if (isLeader()) + if (server->checkInit() && isLeader()) { auto dead_sessions = server->getDeadSessions(); diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index f49063f8dea..8f19b081e26 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -100,7 +100,12 @@ public: /// Initialization from config. /// standalone_keeper -- we are standalone keeper application (not inside clickhouse server) - void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper); + void initialize(const Poco::Util::AbstractConfiguration & config, bool standalone_keeper, bool start_async); + + bool checkInit() const + { + return server && server->checkInit(); + } /// Shutdown internal keeper parts (server, state machine, log storage, etc) void shutdown(); diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index b27170c8ba1..56d28058991 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -353,6 +353,7 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ void KeeperServer::waitInit() { std::unique_lock lock(initialized_mutex); + int64_t timeout = coordination_settings->startup_timeout.totalMilliseconds(); if (!initialized_cv.wait_for(lock, std::chrono::milliseconds(timeout), [&] { return initialized_flag.load(); })) throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization"); diff --git a/src/Coordination/KeeperServer.h b/src/Coordination/KeeperServer.h index a7e96156dc1..8e10d053471 100644 --- a/src/Coordination/KeeperServer.h +++ b/src/Coordination/KeeperServer.h @@ -80,6 +80,12 @@ public: /// Wait server initialization (see callbackFunc) void waitInit(); + /// Return true if KeeperServer initialized + bool checkInit() const + { + return initialized_flag; + } + void shutdown(); int getServerID() const { return server_id; } diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index a59ac60155e..4aaa735e52d 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -702,9 +702,32 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column current_column = current_column->decompress(); if (column.isSubcolumn()) - return column.getTypeInStorage()->getSubcolumn(column.getSubcolumnName(), *current_column); + return column.getTypeInStorage()->getSubcolumn(column.getSubcolumnName(), current_column); return current_column; } + +Block materializeBlock(const Block & block) +{ + if (!block) + return block; + + Block res = block; + size_t columns = res.columns(); + for (size_t i = 0; i < columns; ++i) + { + auto & element = res.getByPosition(i); + element.column = element.column->convertToFullColumnIfConst(); + } + + return res; +} + +void materializeBlockInplace(Block & block) +{ + for (size_t i = 0; i < block.columns(); ++i) + block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); +} + } diff --git a/src/Core/Block.h b/src/Core/Block.h index a7e3cee194b..e0a032094f6 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -196,4 +196,8 @@ void getBlocksDifference(const Block & lhs, const Block & rhs, std::string & out /// Properly handles cases, when column is a subcolumn and when it is compressed. ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column); +/// Converts columns-constants to full columns ("materializes" them). +Block materializeBlock(const Block & block); +void materializeBlockInplace(Block & block); + } diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 55ad748868e..b4adbcc0662 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -10,9 +9,8 @@ #include #include -#include +#include #include -#include #include #include #include diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a5767955045..f91bf684c85 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -625,7 +625,8 @@ class IColumn; M(Bool, cross_to_inner_join_rewrite, true, "Use inner join instead of comma/cross join if possible", 0) \ \ M(Bool, output_format_arrow_low_cardinality_as_dictionary, false, "Enable output LowCardinality type as Dictionary Arrow type", 0) \ - + \ + M(EnumComparingMode, format_capn_proto_enum_comparising_mode, FormatSettings::EnumComparingMode::BY_VALUES, "How to map ClickHouse Enum and CapnProto Enum", 0)\ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 8e588b62326..f5497588891 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -116,4 +116,9 @@ IMPLEMENT_SETTING_ENUM(ShortCircuitFunctionEvaluation, ErrorCodes::BAD_ARGUMENTS {{"enable", ShortCircuitFunctionEvaluation::ENABLE}, {"force_enable", ShortCircuitFunctionEvaluation::FORCE_ENABLE}, {"disable", ShortCircuitFunctionEvaluation::DISABLE}}) + +IMPLEMENT_SETTING_ENUM(EnumComparingMode, ErrorCodes::BAD_ARGUMENTS, + {{"by_names", FormatSettings::EnumComparingMode::BY_NAMES}, + {"by_values", FormatSettings::EnumComparingMode::BY_VALUES}, + {"by_names_case_insensitive", FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index a699da3062c..4bdb3c83ea5 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include @@ -168,4 +168,6 @@ enum class ShortCircuitFunctionEvaluation DECLARE_SETTING_ENUM(ShortCircuitFunctionEvaluation) +DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparingMode) + } diff --git a/src/DataStreams/BlockStreamProfileInfo.cpp b/src/DataStreams/BlockStreamProfileInfo.cpp deleted file mode 100644 index 09ad8a8e4ac..00000000000 --- a/src/DataStreams/BlockStreamProfileInfo.cpp +++ /dev/null @@ -1,146 +0,0 @@ -#include -#include - -#include -#include - -#include - -namespace DB -{ - -void BlockStreamProfileInfo::read(ReadBuffer & in) -{ - readVarUInt(rows, in); - readVarUInt(blocks, in); - readVarUInt(bytes, in); - readBinary(applied_limit, in); - readVarUInt(rows_before_limit, in); - readBinary(calculated_rows_before_limit, in); -} - - -void BlockStreamProfileInfo::write(WriteBuffer & out) const -{ - writeVarUInt(rows, out); - writeVarUInt(blocks, out); - writeVarUInt(bytes, out); - writeBinary(hasAppliedLimit(), out); - writeVarUInt(getRowsBeforeLimit(), out); - writeBinary(calculated_rows_before_limit, out); -} - - -void BlockStreamProfileInfo::setFrom(const BlockStreamProfileInfo & rhs, bool skip_block_size_info) -{ - if (!skip_block_size_info) - { - rows = rhs.rows; - blocks = rhs.blocks; - bytes = rhs.bytes; - } - applied_limit = rhs.applied_limit; - rows_before_limit = rhs.rows_before_limit; - calculated_rows_before_limit = rhs.calculated_rows_before_limit; -} - - -size_t BlockStreamProfileInfo::getRowsBeforeLimit() const -{ - if (!calculated_rows_before_limit) - calculateRowsBeforeLimit(); - return rows_before_limit; -} - - -bool BlockStreamProfileInfo::hasAppliedLimit() const -{ - if (!calculated_rows_before_limit) - calculateRowsBeforeLimit(); - return applied_limit; -} - - -void BlockStreamProfileInfo::update(Block & block) -{ - update(block.rows(), block.bytes()); -} - -void BlockStreamProfileInfo::update(size_t num_rows, size_t num_bytes) -{ - ++blocks; - rows += num_rows; - bytes += num_bytes; -} - - -void BlockStreamProfileInfo::collectInfosForStreamsWithName(const char * name, BlockStreamProfileInfos & res) const -{ - if (!parent) - return; - - if (parent->getName() == name) - { - res.push_back(this); - return; - } - - parent->forEachChild([&] (IBlockInputStream & child) - { - child.getProfileInfo().collectInfosForStreamsWithName(name, res); - return false; - }); -} - - -void BlockStreamProfileInfo::calculateRowsBeforeLimit() const -{ - calculated_rows_before_limit = true; - - /// is there a Limit? - BlockStreamProfileInfos limits; - collectInfosForStreamsWithName("Limit", limits); - - if (!limits.empty()) - { - applied_limit = true; - - /** Take the number of lines read below `PartialSorting`, if any, or below `Limit`. - * This is necessary, because sorting can return only part of the rows. - */ - BlockStreamProfileInfos partial_sortings; - collectInfosForStreamsWithName("PartialSorting", partial_sortings); - - BlockStreamProfileInfos & limits_or_sortings = partial_sortings.empty() ? limits : partial_sortings; - - for (const BlockStreamProfileInfo * info_limit_or_sort : limits_or_sortings) - { - info_limit_or_sort->parent->forEachChild([&] (IBlockInputStream & child) - { - rows_before_limit += child.getProfileInfo().rows; - return false; - }); - } - } - else - { - /// Then the data about `rows_before_limit` can be in `RemoteBlockInputStream` (come from a remote server). - BlockStreamProfileInfos remotes; - collectInfosForStreamsWithName("Remote", remotes); - collectInfosForStreamsWithName("TreeExecutor", remotes); - - if (remotes.empty()) - return; - - for (const auto & info : remotes) - { - if (info->applied_limit) - { - applied_limit = true; - rows_before_limit += info->rows_before_limit; - } - } - } -} - -} diff --git a/src/DataStreams/IBlockInputStream.cpp b/src/DataStreams/IBlockInputStream.cpp deleted file mode 100644 index e57d6903673..00000000000 --- a/src/DataStreams/IBlockInputStream.cpp +++ /dev/null @@ -1,359 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include - -namespace ProfileEvents -{ - extern const Event ThrottlerSleepMicroseconds; - extern const Event SelectedRows; - extern const Event SelectedBytes; -} - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int QUERY_WAS_CANCELLED; - extern const int TOO_MANY_ROWS; - extern const int TOO_MANY_BYTES; - extern const int TOO_MANY_ROWS_OR_BYTES; - extern const int LOGICAL_ERROR; -} - - -/// It's safe to access children without mutex as long as these methods are called before first call to `read()` or `readPrefix()`. - - -Block IBlockInputStream::read() -{ - if (total_rows_approx) - { - progressImpl(Progress(0, 0, total_rows_approx)); - total_rows_approx = 0; - } - - if (!info.started) - { - info.total_stopwatch.start(); - info.started = true; - } - - Block res; - - if (isCancelledOrThrowIfKilled()) - return res; - - if (!checkTimeLimit()) - limit_exceeded_need_break = true; - - if (!limit_exceeded_need_break) - res = readImpl(); - - if (res) - { - info.update(res); - - if (enabled_extremes) - updateExtremes(res); - - if (limits.mode == LimitsMode::LIMITS_CURRENT && !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES)) - limit_exceeded_need_break = true; - - if (quota) - checkQuota(res); - } - else - { - /** If the stream is over, then we will ask all children to abort the execution. - * This makes sense when running a query with LIMIT - * - there is a situation when all the necessary data has already been read, - * but children sources are still working, - * herewith they can work in separate threads or even remotely. - */ - cancel(false); - } - - progress(Progress(res.rows(), res.bytes())); - -#ifndef NDEBUG - if (res) - { - Block header = getHeader(); - if (header) - assertBlocksHaveEqualStructure(res, header, getName()); - } -#endif - - return res; -} - - -void IBlockInputStream::readPrefix() -{ -#ifndef NDEBUG - if (!read_prefix_is_called) - read_prefix_is_called = true; - else - throw Exception("readPrefix is called twice for " + getName() + " stream", ErrorCodes::LOGICAL_ERROR); -#endif - - readPrefixImpl(); - - forEachChild([&] (IBlockInputStream & child) - { - child.readPrefix(); - return false; - }); -} - - -void IBlockInputStream::readSuffix() -{ -#ifndef NDEBUG - if (!read_suffix_is_called) - read_suffix_is_called = true; - else - throw Exception("readSuffix is called twice for " + getName() + " stream", ErrorCodes::LOGICAL_ERROR); -#endif - - forEachChild([&] (IBlockInputStream & child) - { - child.readSuffix(); - return false; - }); - - readSuffixImpl(); -} - - -void IBlockInputStream::updateExtremes(Block & block) -{ - size_t num_columns = block.columns(); - - if (!extremes) - { - MutableColumns extremes_columns(num_columns); - - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnPtr & src = block.safeGetByPosition(i).column; - - if (isColumnConst(*src)) - { - /// Equal min and max. - extremes_columns[i] = src->cloneResized(2); - } - else - { - Field min_value; - Field max_value; - - src->getExtremes(min_value, max_value); - - extremes_columns[i] = src->cloneEmpty(); - - extremes_columns[i]->insert(min_value); - extremes_columns[i]->insert(max_value); - } - } - - extremes = block.cloneWithColumns(std::move(extremes_columns)); - } - else - { - for (size_t i = 0; i < num_columns; ++i) - { - ColumnPtr & old_extremes = extremes.safeGetByPosition(i).column; - - if (isColumnConst(*old_extremes)) - continue; - - Field min_value = (*old_extremes)[0]; - Field max_value = (*old_extremes)[1]; - - Field cur_min_value; - Field cur_max_value; - - block.safeGetByPosition(i).column->getExtremes(cur_min_value, cur_max_value); - - if (cur_min_value < min_value) - min_value = cur_min_value; - if (cur_max_value > max_value) - max_value = cur_max_value; - - MutableColumnPtr new_extremes = old_extremes->cloneEmpty(); - - new_extremes->insert(min_value); - new_extremes->insert(max_value); - - old_extremes = std::move(new_extremes); - } - } -} - - -bool IBlockInputStream::checkTimeLimit() const -{ - return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode); -} - - -void IBlockInputStream::checkQuota(Block & block) -{ - switch (limits.mode) - { - case LimitsMode::LIMITS_TOTAL: - /// Checked in `progress` method. - break; - - case LimitsMode::LIMITS_CURRENT: - { - UInt64 total_elapsed = info.total_stopwatch.elapsedNanoseconds(); - quota->used({Quota::RESULT_ROWS, block.rows()}, {Quota::RESULT_BYTES, block.bytes()}, {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed}); - prev_elapsed = total_elapsed; - break; - } - } -} - - -void IBlockInputStream::progressImpl(const Progress & value) -{ - if (progress_callback) - progress_callback(value); - - if (process_list_elem) - { - if (!process_list_elem->updateProgressIn(value)) - cancel(/* kill */ true); - - /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers. - - ProgressValues progress = process_list_elem->getProgressIn(); - size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read); - - /** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read. - * NOTE: Maybe it makes sense to have them checked directly in ProcessList? - */ - if (limits.mode == LimitsMode::LIMITS_TOTAL) - { - if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read", - ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) - cancel(false); - } - - size_t total_rows = progress.total_rows_to_read; - - constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds - UInt64 total_elapsed_microseconds = info.total_stopwatch.elapsedMicroseconds(); - - if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) - { - CurrentThread::updatePerformanceCounters(); - last_profile_events_update_time = total_elapsed_microseconds; - } - - limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); - - if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) - quota->used({Quota::READ_ROWS, value.read_rows}, {Quota::READ_BYTES, value.read_bytes}); - } - - ProfileEvents::increment(ProfileEvents::SelectedRows, value.read_rows); - ProfileEvents::increment(ProfileEvents::SelectedBytes, value.read_bytes); -} - - -void IBlockInputStream::cancel(bool kill) -{ - if (kill) - is_killed = true; - - bool old_val = false; - if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed)) - return; - - forEachChild([&] (IBlockInputStream & child) - { - child.cancel(kill); - return false; - }); -} - - -bool IBlockInputStream::isCancelled() const -{ - return is_cancelled; -} - -bool IBlockInputStream::isCancelledOrThrowIfKilled() const -{ - if (!is_cancelled) - return false; - if (is_killed) - throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); - return true; -} - - -void IBlockInputStream::setProgressCallback(const ProgressCallback & callback) -{ - progress_callback = callback; - - forEachChild([&] (IBlockInputStream & child) - { - child.setProgressCallback(callback); - return false; - }); -} - - -void IBlockInputStream::setProcessListElement(QueryStatus * elem) -{ - process_list_elem = elem; - - forEachChild([&] (IBlockInputStream & child) - { - child.setProcessListElement(elem); - return false; - }); -} - - -Block IBlockInputStream::getTotals() -{ - if (totals) - return totals; - - Block res; - forEachChild([&] (IBlockInputStream & child) - { - res = child.getTotals(); - return bool(res); - }); - return res; -} - - -Block IBlockInputStream::getExtremes() -{ - if (extremes) - return extremes; - - Block res; - forEachChild([&] (IBlockInputStream & child) - { - res = child.getExtremes(); - return bool(res); - }); - return res; -} - -} diff --git a/src/DataStreams/IBlockInputStream.h b/src/DataStreams/IBlockInputStream.h deleted file mode 100644 index 0e77ba81779..00000000000 --- a/src/DataStreams/IBlockInputStream.h +++ /dev/null @@ -1,271 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ -} - -class ProcessListElement; -class EnabledQuota; -class QueryStatus; - - -/** The stream interface for reading data by blocks from the database. - * Relational operations are supposed to be done also as implementations of this interface. - * Watches out at how the source of the blocks works. - * Lets you get information for profiling: rows per second, blocks per second, megabytes per second, etc. - * Allows you to stop reading data (in nested sources). - */ -class IBlockInputStream : public TypePromotion -{ - friend struct BlockStreamProfileInfo; - -public: - IBlockInputStream() { info.parent = this; } - virtual ~IBlockInputStream() = default; - - IBlockInputStream(const IBlockInputStream &) = delete; - IBlockInputStream & operator=(const IBlockInputStream &) = delete; - - /// To output the data stream transformation tree (query execution plan). - virtual String getName() const = 0; - - /** Get data structure of the stream in a form of "header" block (it is also called "sample block"). - * Header block contains column names, data types, columns of size 0. Constant columns must have corresponding values. - * It is guaranteed that method "read" returns blocks of exactly that structure. - */ - virtual Block getHeader() const = 0; - - virtual const BlockMissingValues & getMissingValues() const - { - static const BlockMissingValues none; - return none; - } - - /** Read next block. - * If there are no more blocks, return an empty block (for which operator `bool` returns false). - * NOTE: Only one thread can read from one instance of IBlockInputStream simultaneously. - * This also applies for readPrefix, readSuffix. - */ - Block read(); - - /** Read something before starting all data or after the end of all data. - * In the `readSuffix` function, you can implement a finalization that can lead to an exception. - * readPrefix() must be called before the first call to read(). - * readSuffix() should be called after read() returns an empty block, or after a call to cancel(), but not during read() execution. - */ - - /** The default implementation calls readPrefixImpl() on itself, and then readPrefix() recursively for all children. - * There are cases when you do not want `readPrefix` of children to be called synchronously, in this function, - * but you want them to be called, for example, in separate threads (for parallel initialization of children). - * Then overload `readPrefix` function. - */ - virtual void readPrefix(); - - /** The default implementation calls recursively readSuffix() on all children, and then readSuffixImpl() on itself. - * If this stream calls read() in children in a separate thread, this behavior is usually incorrect: - * readSuffix() of the child can not be called at the moment when the same child's read() is executed in another thread. - * In this case, you need to override this method so that readSuffix() in children is called, for example, after connecting streams. - */ - virtual void readSuffix(); - - /// Do not allow to change the table while the blocks stream and its children are alive. - void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } - - /// Get information about execution speed. - const BlockStreamProfileInfo & getProfileInfo() const { return info; } - - /** Get "total" values. - * The default implementation takes them from itself or from the first child source in which they are. - * The overridden method can perform some calculations. For example, apply an expression to the `totals` of the child source. - * There can be no total values - then an empty block is returned. - * - * Call this method only after all the data has been retrieved with `read`, - * otherwise there will be problems if any data at the same time is computed in another thread. - */ - virtual Block getTotals(); - - /// The same for minimums and maximums. - virtual Block getExtremes(); - - - /** Set the execution progress bar callback. - * The callback is passed to all child sources. - * By default, it is called for leaf sources, after each block. - * (But this can be overridden in the progress() method) - * The function takes the number of rows in the last block, the number of bytes in the last block. - * Note that the callback can be called from different threads. - */ - virtual void setProgressCallback(const ProgressCallback & callback); - - - /** In this method: - * - the progress callback is called; - * - the status of the query execution in ProcessList is updated; - * - checks restrictions and quotas that should be checked not within the same source, - * but over the total amount of resources spent in all sources at once (information in the ProcessList). - */ - virtual void progress(const Progress & value) - { - /// The data for progress is taken from leaf sources. - if (children.empty()) - progressImpl(value); - } - - void progressImpl(const Progress & value); - - - /** Set the pointer to the process list item. - * It is passed to all child sources. - * General information about the resources spent on the request will be written into it. - * Based on this information, the quota and some restrictions will be checked. - * This information will also be available in the SHOW PROCESSLIST request. - */ - virtual void setProcessListElement(QueryStatus * elem); - - /** Set the approximate total number of rows to read. - */ - void addTotalRowsApprox(size_t value) { total_rows_approx += value; } - - - /** Ask to abort the receipt of data as soon as possible. - * By default - just sets the flag is_cancelled and asks that all children be interrupted. - * This function can be called several times, including simultaneously from different threads. - * Have two modes: - * with kill = false only is_cancelled is set - streams will stop silently with returning some processed data. - * with kill = true also is_killed set - queries will stop with exception. - */ - virtual void cancel(bool kill); - - bool isCancelled() const; - bool isCancelledOrThrowIfKilled() const; - - /** Set limitations that checked on each block. */ - virtual void setLimits(const StreamLocalLimits & limits_) - { - limits = limits_; - } - - const StreamLocalLimits & getLimits() const - { - return limits; - } - - /** Set the quota. If you set a quota on the amount of raw data, - * then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. - */ - virtual void setQuota(const std::shared_ptr & new_quota) - { - quota = new_quota; - } - - /// Enable calculation of minimums and maximums by the result columns. - void enableExtremes() { enabled_extremes = true; } - -protected: - /// Order is important: `table_locks` must be destroyed after `children` so that tables from - /// which child streams read are protected by the locks during the lifetime of the child streams. - std::vector table_locks; - - BlockInputStreams children; - std::shared_mutex children_mutex; - - BlockStreamProfileInfo info; - std::atomic is_cancelled{false}; - std::atomic is_killed{false}; - ProgressCallback progress_callback; - QueryStatus * process_list_elem = nullptr; - /// According to total_stopwatch in microseconds - UInt64 last_profile_events_update_time = 0; - - /// Additional information that can be generated during the work process. - - /// Total values during aggregation. - Block totals; - /// Minimums and maximums. The first row of the block - minimums, the second - the maximums. - Block extremes; - - - void addChild(const BlockInputStreamPtr & child) - { - std::unique_lock lock(children_mutex); - children.push_back(child); - } - - /** Check limits. - * But only those that can be checked within each separate stream. - */ - bool checkTimeLimit() const; - -#ifndef NDEBUG - bool read_prefix_is_called = false; - bool read_suffix_is_called = false; -#endif - -private: - bool enabled_extremes = false; - - /// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out. - bool limit_exceeded_need_break = false; - - /// Limitations and quotas. - - StreamLocalLimits limits; - - std::shared_ptr quota; /// If nullptr - the quota is not used. - UInt64 prev_elapsed = 0; - - /// The approximate total number of rows to read. For progress bar. - size_t total_rows_approx = 0; - - /// Derived classes must implement this function. - virtual Block readImpl() = 0; - - /// Here you can do a preliminary initialization. - virtual void readPrefixImpl() {} - - /// Here you need to do a finalization, which can lead to an exception. - virtual void readSuffixImpl() {} - - void updateExtremes(Block & block); - - /** Check quotas. - * But only those that can be checked within each separate stream. - */ - void checkQuota(Block & block); - - size_t checkDepthImpl(size_t max_depth, size_t level) const; - - template - void forEachChild(F && f) - { - /// NOTE: Acquire a read lock, therefore f() should be thread safe - std::shared_lock lock(children_mutex); - - // Reduce lock scope and avoid recursive locking since that is undefined for shared_mutex. - const auto children_copy = children; - lock.unlock(); - - for (auto & child : children_copy) - if (f(*child)) - return; - } - -}; - -} diff --git a/src/DataStreams/IBlockOutputStream.h b/src/DataStreams/IBlockOutputStream.h deleted file mode 100644 index 65ebd90769d..00000000000 --- a/src/DataStreams/IBlockOutputStream.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -#include -#include -#include - - -namespace DB -{ - -struct Progress; - -/** Interface of stream for writing data (into table, filesystem, network, terminal, etc.) - */ -class IBlockOutputStream : private boost::noncopyable -{ -public: - IBlockOutputStream() = default; - - /** Get data structure of the stream in a form of "header" block (it is also called "sample block"). - * Header block contains column names, data types, columns of size 0. Constant columns must have corresponding values. - * You must pass blocks of exactly this structure to the 'write' method. - */ - virtual Block getHeader() const = 0; - - /** Write block. - */ - virtual void write(const Block & block) = 0; - - /** Write or do something before all data or after all data. - */ - virtual void writePrefix() {} - virtual void writeSuffix() {} - - /** Flush output buffers if any. - */ - virtual void flush() {} - - /** Methods to set additional information for output in formats, that support it. - */ - virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {} - virtual void setTotals(const Block & /*totals*/) {} - virtual void setExtremes(const Block & /*extremes*/) {} - - /** Notify about progress. Method could be called from different threads. - * Passed value are delta, that must be summarized. - */ - virtual void onProgress(const Progress & /*progress*/) {} - - /** Content-Type to set when sending HTTP response. - */ - virtual std::string getContentType() const { return "text/plain; charset=UTF-8"; } - - virtual ~IBlockOutputStream() = default; - - /** Don't let to alter table while instance of stream is alive. - */ - void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } - -private: - std::vector table_locks; -}; - -} diff --git a/src/DataStreams/IBlockStream_fwd.h b/src/DataStreams/IBlockStream_fwd.h deleted file mode 100644 index d74a9528ed9..00000000000 --- a/src/DataStreams/IBlockStream_fwd.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class IBlockInputStream; -class IBlockOutputStream; - -using BlockInputStreamPtr = std::shared_ptr; -using BlockInputStreams = std::vector; -using BlockOutputStreamPtr = std::shared_ptr; -using BlockOutputStreams = std::vector; - -} diff --git a/src/DataStreams/InternalTextLogs.h b/src/DataStreams/InternalTextLogs.h deleted file mode 100644 index 1312c1d327c..00000000000 --- a/src/DataStreams/InternalTextLogs.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include -#include - - -namespace DB -{ - -/// Prints internal server logs -/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock() -/// NOTE: IRowOutputFormat does not suite well for this case -class InternalTextLogs -{ -public: - InternalTextLogs(WriteBuffer & buf_out, bool color_) : wb(buf_out), color(color_) {} - - - void write(const Block & block); - - void flush() - { - wb.next(); - } - -private: - WriteBuffer & wb; - bool color; -}; - -} diff --git a/src/DataStreams/MaterializingBlockOutputStream.h b/src/DataStreams/MaterializingBlockOutputStream.h deleted file mode 100644 index 64c2bc12a57..00000000000 --- a/src/DataStreams/MaterializingBlockOutputStream.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** Converts columns-constants to full columns ("materializes" them). - */ -class MaterializingBlockOutputStream : public IBlockOutputStream -{ -public: - MaterializingBlockOutputStream(const BlockOutputStreamPtr & output_, const Block & header_) - : output{output_}, header(header_) {} - - Block getHeader() const override { return header; } - void write(const Block & block) override { output->write(materializeBlock(block)); } - void flush() override { output->flush(); } - void writePrefix() override { output->writePrefix(); } - void writeSuffix() override { output->writeSuffix(); } - void setRowsBeforeLimit(size_t rows_before_limit) override { output->setRowsBeforeLimit(rows_before_limit); } - void setTotals(const Block & totals) override { output->setTotals(materializeBlock(totals)); } - void setExtremes(const Block & extremes) override { output->setExtremes(materializeBlock(extremes)); } - void onProgress(const Progress & progress) override { output->onProgress(progress); } - String getContentType() const override { return output->getContentType(); } - -private: - BlockOutputStreamPtr output; - Block header; -}; - -} diff --git a/src/DataStreams/OneBlockInputStream.h b/src/DataStreams/OneBlockInputStream.h deleted file mode 100644 index d401082ce62..00000000000 --- a/src/DataStreams/OneBlockInputStream.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** A stream of blocks from which you can read one block. - */ -class OneBlockInputStream : public IBlockInputStream -{ -public: - explicit OneBlockInputStream(Block block_) : block(std::move(block_)) { block.checkNumberOfRows(); } - - String getName() const override { return "One"; } - - Block getHeader() const override - { - Block res; - for (const auto & elem : block) - res.insert({ elem.column->cloneEmpty(), elem.type, elem.name }); - return res; - } - -protected: - Block readImpl() override - { - if (has_been_read) - return Block(); - - has_been_read = true; - return block; - } - -private: - Block block; - bool has_been_read = false; -}; - -} diff --git a/src/DataStreams/SquashingBlockInputStream.cpp b/src/DataStreams/SquashingBlockInputStream.cpp deleted file mode 100644 index e13dee37008..00000000000 --- a/src/DataStreams/SquashingBlockInputStream.cpp +++ /dev/null @@ -1,32 +0,0 @@ -#include - - -namespace DB -{ - -SquashingBlockInputStream::SquashingBlockInputStream( - const BlockInputStreamPtr & src, size_t min_block_size_rows, size_t min_block_size_bytes, bool reserve_memory) - : header(src->getHeader()), transform(min_block_size_rows, min_block_size_bytes, reserve_memory) -{ - children.emplace_back(src); -} - - -Block SquashingBlockInputStream::readImpl() -{ - while (!all_read) - { - Block block = children[0]->read(); - if (!block) - all_read = true; - - auto squashed_block = transform.add(std::move(block)); - if (squashed_block) - { - return squashed_block; - } - } - return {}; -} - -} diff --git a/src/DataStreams/SquashingBlockInputStream.h b/src/DataStreams/SquashingBlockInputStream.h deleted file mode 100644 index c2732d520cc..00000000000 --- a/src/DataStreams/SquashingBlockInputStream.h +++ /dev/null @@ -1,31 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** Merging consecutive blocks of stream to specified minimum size. - */ -class SquashingBlockInputStream : public IBlockInputStream -{ -public: - SquashingBlockInputStream(const BlockInputStreamPtr & src, size_t min_block_size_rows, size_t min_block_size_bytes, - bool reserve_memory = false); - - String getName() const override { return "Squashing"; } - - Block getHeader() const override { return header; } - -protected: - Block readImpl() override; - -private: - Block header; - SquashingTransform transform; - bool all_read = false; -}; - -} diff --git a/src/DataStreams/SquashingBlockOutputStream.cpp b/src/DataStreams/SquashingBlockOutputStream.cpp deleted file mode 100644 index ab12f66590f..00000000000 --- a/src/DataStreams/SquashingBlockOutputStream.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - - -namespace DB -{ - -SquashingBlockOutputStream::SquashingBlockOutputStream(BlockOutputStreamPtr dst, Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) - : output(std::move(dst)), header(std::move(header_)), transform(min_block_size_rows, min_block_size_bytes) -{ -} - - -void SquashingBlockOutputStream::write(const Block & block) -{ - auto squashed_block = transform.add(block); - if (squashed_block) - output->write(squashed_block); -} - - -void SquashingBlockOutputStream::finalize() -{ - if (all_written) - return; - - all_written = true; - - auto squashed_block = transform.add({}); - if (squashed_block) - output->write(squashed_block); -} - - -void SquashingBlockOutputStream::flush() -{ - if (!disable_flush) - finalize(); - output->flush(); -} - - -void SquashingBlockOutputStream::writePrefix() -{ - output->writePrefix(); -} - - -void SquashingBlockOutputStream::writeSuffix() -{ - finalize(); - output->writeSuffix(); -} - -} diff --git a/src/DataStreams/SquashingBlockOutputStream.h b/src/DataStreams/SquashingBlockOutputStream.h deleted file mode 100644 index 7828ad7e96d..00000000000 --- a/src/DataStreams/SquashingBlockOutputStream.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include - - -namespace DB -{ - -/** Merging consecutive blocks of stream to specified minimum size. - */ -class SquashingBlockOutputStream : public IBlockOutputStream -{ -public: - SquashingBlockOutputStream(BlockOutputStreamPtr dst, Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); - - Block getHeader() const override { return header; } - void write(const Block & block) override; - - void flush() override; - void writePrefix() override; - void writeSuffix() override; - - /// Don't write blocks less than specified size even when flush method was called by user. - void disableFlush() { disable_flush = true; } - -private: - BlockOutputStreamPtr output; - Block header; - - SquashingTransform transform; - bool all_written = false; - - void finalize(); - - bool disable_flush = false; -}; - -} diff --git a/src/DataStreams/copyData.cpp b/src/DataStreams/copyData.cpp deleted file mode 100644 index a26052778a8..00000000000 --- a/src/DataStreams/copyData.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - -namespace -{ - -bool isAtomicSet(std::atomic * val) -{ - return ((val != nullptr) && val->load(std::memory_order_seq_cst)); -} - -} - -template -void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCallback && is_cancelled, TProgressCallback && progress) -{ - from.readPrefix(); - to.writePrefix(); - - while (Block block = from.read()) - { - if (is_cancelled()) - break; - - to.write(block); - progress(block); - } - - if (is_cancelled()) - return; - - /// For outputting additional information in some formats. - if (from.getProfileInfo().hasAppliedLimit()) - to.setRowsBeforeLimit(from.getProfileInfo().getRowsBeforeLimit()); - - to.setTotals(from.getTotals()); - to.setExtremes(from.getExtremes()); - - if (is_cancelled()) - return; - - from.readSuffix(); - to.writeSuffix(); -} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & progress, - std::atomic * is_cancelled) -{ - auto is_cancelled_pred = [is_cancelled] () - { - return isAtomicSet(is_cancelled); - }; - - copyDataImpl(from, to, is_cancelled_pred, progress); -} - -inline void doNothing(const Block &) {} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled) -{ - auto is_cancelled_pred = [is_cancelled] () - { - return isAtomicSet(is_cancelled); - }; - - copyDataImpl(from, to, is_cancelled_pred, doNothing); -} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled) -{ - copyDataImpl(from, to, is_cancelled, doNothing); -} - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled, - const std::function & progress) -{ - copyDataImpl(from, to, is_cancelled, progress); -} - -} diff --git a/src/DataStreams/copyData.h b/src/DataStreams/copyData.h deleted file mode 100644 index 3dc90aed37d..00000000000 --- a/src/DataStreams/copyData.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include - -#include -#include - - -namespace DB -{ - -class Block; - -/** Copies data from the InputStream into the OutputStream - * (for example, from the database to the console, etc.) - */ -void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled = nullptr); - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & progress, - std::atomic * is_cancelled = nullptr); - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled); - -void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled, - const std::function & progress); - -} diff --git a/src/DataStreams/finalizeBlock.cpp b/src/DataStreams/finalizeBlock.cpp deleted file mode 100644 index 56068edcc29..00000000000 --- a/src/DataStreams/finalizeBlock.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ - void finalizeBlock(Block & block) - { - for (size_t i = 0; i < block.columns(); ++i) - { - ColumnWithTypeAndName & current = block.getByPosition(i); - const DataTypeAggregateFunction * unfinalized_type = typeid_cast(current.type.get()); - - if (unfinalized_type) - { - current.type = unfinalized_type->getReturnType(); - if (current.column) - { - auto mut_column = IColumn::mutate(std::move(current.column)); - current.column = ColumnAggregateFunction::convertToValues(std::move(mut_column)); - } - } - } - } -} diff --git a/src/DataStreams/finalizeBlock.h b/src/DataStreams/finalizeBlock.h deleted file mode 100644 index 3c81ddae1c7..00000000000 --- a/src/DataStreams/finalizeBlock.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - /// Converts aggregate function columns with non-finalized states to final values - void finalizeBlock(Block & block); -} diff --git a/src/DataStreams/materializeBlock.cpp b/src/DataStreams/materializeBlock.cpp deleted file mode 100644 index 6b47cb87baa..00000000000 --- a/src/DataStreams/materializeBlock.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include - - -namespace DB -{ - -Block materializeBlock(const Block & block) -{ - if (!block) - return block; - - Block res = block; - size_t columns = res.columns(); - for (size_t i = 0; i < columns; ++i) - { - auto & element = res.getByPosition(i); - element.column = element.column->convertToFullColumnIfConst(); - } - - return res; -} - -void materializeBlockInplace(Block & block) -{ - for (size_t i = 0; i < block.columns(); ++i) - block.getByPosition(i).column = block.getByPosition(i).column->convertToFullColumnIfConst(); -} - -} diff --git a/src/DataStreams/materializeBlock.h b/src/DataStreams/materializeBlock.h deleted file mode 100644 index 5e1499319c1..00000000000 --- a/src/DataStreams/materializeBlock.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include - - -namespace DB -{ - -/** Converts columns-constants to full columns ("materializes" them). - */ -Block materializeBlock(const Block & block); -void materializeBlockInplace(Block & block); - -} diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index f7ae3170119..5c4b94ad823 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -3,8 +3,6 @@ #include -#include -#include #include #include diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp index bcf3a9c1f57..f78aebd2d99 100644 --- a/src/DataTypes/DataTypeArray.cpp +++ b/src/DataTypes/DataTypeArray.cpp @@ -1,17 +1,9 @@ #include -#include -#include -#include -#include - #include -#include #include #include #include -#include -#include #include @@ -53,69 +45,6 @@ bool DataTypeArray::equals(const IDataType & rhs) const return typeid(rhs) == typeid(*this) && nested->equals(*static_cast(rhs).nested); } -DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) const -{ - return tryGetSubcolumnTypeImpl(subcolumn_name, 0); -} - -DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const -{ - if (subcolumn_name == "size" + std::to_string(level)) - return std::make_shared(); - - DataTypePtr subcolumn; - if (const auto * nested_array = typeid_cast(nested.get())) - subcolumn = nested_array->tryGetSubcolumnTypeImpl(subcolumn_name, level + 1); - else - subcolumn = nested->tryGetSubcolumnType(subcolumn_name); - - if (subcolumn && subcolumn_name != MAIN_SUBCOLUMN_NAME) - subcolumn = std::make_shared(std::move(subcolumn)); - - return subcolumn; -} - -ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - return getSubcolumnImpl(subcolumn_name, column, 0); -} - -ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const -{ - const auto & column_array = assert_cast(column); - if (subcolumn_name == "size" + std::to_string(level)) - return arrayOffsetsToSizes(column_array.getOffsetsColumn()); - - ColumnPtr subcolumn; - if (const auto * nested_array = typeid_cast(nested.get())) - subcolumn = nested_array->getSubcolumnImpl(subcolumn_name, column_array.getData(), level + 1); - else - subcolumn = nested->getSubcolumn(subcolumn_name, column_array.getData()); - - return ColumnArray::create(subcolumn, column_array.getOffsetsPtr()); -} - -SerializationPtr DataTypeArray::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0); -} - -SerializationPtr DataTypeArray::getSubcolumnSerializationImpl( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const -{ - if (subcolumn_name == "size" + std::to_string(level)) - return std::make_shared(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false); - - SerializationPtr subcolumn; - if (const auto * nested_array = typeid_cast(nested.get())) - subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1); - else - subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); - - return std::make_shared(subcolumn); -} - SerializationPtr DataTypeArray::doGetDefaultSerialization() const { return std::make_shared(nested->getDefaultSerialization()); diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index c720a15d798..564dbba8503 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -54,23 +54,12 @@ public: return nested->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion(); } - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - SerializationPtr doGetDefaultSerialization() const override; const DataTypePtr & getNestedType() const { return nested; } /// 1 for plain array, 2 for array of arrays and so on. size_t getNumberOfDimensions() const; - -private: - ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const; - DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const; - SerializationPtr getSubcolumnSerializationImpl( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const; }; } diff --git a/src/DataTypes/DataTypeDate.cpp b/src/DataTypes/DataTypeDate.cpp index 0df2e329702..ee4b0065e59 100644 --- a/src/DataTypes/DataTypeDate.cpp +++ b/src/DataTypes/DataTypeDate.cpp @@ -1,14 +1,7 @@ -#include -#include - -#include #include #include #include -#include - - namespace DB { diff --git a/src/DataTypes/DataTypeDateTime.cpp b/src/DataTypes/DataTypeDateTime.cpp index 4284c9ae4bd..c7722e1c1d9 100644 --- a/src/DataTypes/DataTypeDateTime.cpp +++ b/src/DataTypes/DataTypeDateTime.cpp @@ -1,28 +1,12 @@ #include #include -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include namespace DB { -TimezoneMixin::TimezoneMixin(const String & time_zone_name) - : has_explicit_time_zone(!time_zone_name.empty()), - time_zone(DateLUT::instance(time_zone_name)), - utc_time_zone(DateLUT::instance("UTC")) -{ -} - DataTypeDateTime::DataTypeDateTime(const String & time_zone_name) : TimezoneMixin(time_zone_name) { @@ -52,7 +36,7 @@ bool DataTypeDateTime::equals(const IDataType & rhs) const SerializationPtr DataTypeDateTime::doGetDefaultSerialization() const { - return std::make_shared(time_zone, utc_time_zone); + return std::make_shared(*this); } } diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 926d529a5d8..57052144216 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -2,33 +2,11 @@ #include #include - -class DateLUTImpl; +#include namespace DB { -/** Mixin-class that manages timezone info for timezone-aware DateTime implementations - * - * Must be used as a (second) base for class implementing IDateType-interface. - */ -class TimezoneMixin -{ -public: - explicit TimezoneMixin(const String & time_zone_name = ""); - TimezoneMixin(const TimezoneMixin &) = default; - - const DateLUTImpl & getTimeZone() const { return time_zone; } - bool hasExplicitTimeZone() const { return has_explicit_time_zone; } - -protected: - /// true if time zone name was provided in data type parameters, false if it's using default time zone. - bool has_explicit_time_zone; - - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; -}; - /** DateTime stores time as unix timestamp. * The value itself is independent of time zone. * diff --git a/src/DataTypes/DataTypeDateTime64.cpp b/src/DataTypes/DataTypeDateTime64.cpp index bde7bebf455..4fa1569f0e8 100644 --- a/src/DataTypes/DataTypeDateTime64.cpp +++ b/src/DataTypes/DataTypeDateTime64.cpp @@ -1,19 +1,7 @@ #include #include - -#include -#include -#include -#include -#include -#include #include -#include #include -#include -#include -#include - #include #include @@ -65,7 +53,7 @@ bool DataTypeDateTime64::equals(const IDataType & rhs) const SerializationPtr DataTypeDateTime64::doGetDefaultSerialization() const { - return std::make_shared(time_zone, utc_time_zone, scale); + return std::make_shared(scale, *this); } } diff --git a/src/DataTypes/DataTypeDecimalBase.cpp b/src/DataTypes/DataTypeDecimalBase.cpp index f4c28088c48..62218694924 100644 --- a/src/DataTypes/DataTypeDecimalBase.cpp +++ b/src/DataTypes/DataTypeDecimalBase.cpp @@ -1,15 +1,5 @@ #include - -#include -#include -#include -#include -#include -#include #include -#include -#include - #include namespace DB diff --git a/src/DataTypes/DataTypeEnum.cpp b/src/DataTypes/DataTypeEnum.cpp index c86dd9d0b33..b659d92e3f4 100644 --- a/src/DataTypes/DataTypeEnum.cpp +++ b/src/DataTypes/DataTypeEnum.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/src/DataTypes/DataTypeFixedString.cpp b/src/DataTypes/DataTypeFixedString.cpp index a40592ba023..48034a31707 100644 --- a/src/DataTypes/DataTypeFixedString.cpp +++ b/src/DataTypes/DataTypeFixedString.cpp @@ -1,22 +1,12 @@ #include -#include -#include #include #include #include -#include -#include -#include -#include - #include #include -#include -#include - namespace DB { diff --git a/src/DataTypes/DataTypeMap.cpp b/src/DataTypes/DataTypeMap.cpp index 5acf498c9fc..41de17982aa 100644 --- a/src/DataTypes/DataTypeMap.cpp +++ b/src/DataTypes/DataTypeMap.cpp @@ -1,9 +1,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -11,14 +9,7 @@ #include #include #include -#include -#include -#include -#include -#include -#include #include -#include #include @@ -84,27 +75,6 @@ std::string DataTypeMap::doGetName() const return s.str(); } -static const IColumn & extractNestedColumn(const IColumn & column) -{ - return assert_cast(column).getNestedColumn(); -} - -DataTypePtr DataTypeMap::tryGetSubcolumnType(const String & subcolumn_name) const -{ - return nested->tryGetSubcolumnType(subcolumn_name); -} - -ColumnPtr DataTypeMap::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - return nested->getSubcolumn(subcolumn_name, extractNestedColumn(column)); -} - -SerializationPtr DataTypeMap::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); -} - MutableColumnPtr DataTypeMap::createColumn() const { return ColumnMap::create(nested->createColumn()); diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 09b8448885a..04377f85cfb 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -32,11 +32,6 @@ public: bool canBeInsideNullable() const override { return false; } - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - MutableColumnPtr createColumn() const override; Field getDefault() const override; diff --git a/src/DataTypes/DataTypeNested.cpp b/src/DataTypes/DataTypeNested.cpp index eba1bba5dfe..fe7cd515c81 100644 --- a/src/DataTypes/DataTypeNested.cpp +++ b/src/DataTypes/DataTypeNested.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/DataTypes/DataTypeNothing.cpp b/src/DataTypes/DataTypeNothing.cpp index 388a65754b5..c2b552035a0 100644 --- a/src/DataTypes/DataTypeNothing.cpp +++ b/src/DataTypes/DataTypeNothing.cpp @@ -1,10 +1,7 @@ -#include #include #include #include #include -#include -#include namespace DB diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 3820a320c6d..b354b1278be 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -1,17 +1,9 @@ #include #include -#include #include #include -#include #include #include -#include -#include -#include -#include -#include -#include #include #include #include @@ -63,32 +55,6 @@ bool DataTypeNullable::equals(const IDataType & rhs) const return rhs.isNullable() && nested_data_type->equals(*static_cast(rhs).nested_data_type); } -DataTypePtr DataTypeNullable::tryGetSubcolumnType(const String & subcolumn_name) const -{ - if (subcolumn_name == "null") - return std::make_shared(); - - return nested_data_type->tryGetSubcolumnType(subcolumn_name); -} - -ColumnPtr DataTypeNullable::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - const auto & column_nullable = assert_cast(column); - if (subcolumn_name == "null") - return column_nullable.getNullMapColumnPtr(); - - return nested_data_type->getSubcolumn(subcolumn_name, column_nullable.getNestedColumn()); -} - -SerializationPtr DataTypeNullable::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - if (subcolumn_name == "null") - return std::make_shared(base_serialization_getter(DataTypeUInt8()), subcolumn_name, false); - - return nested_data_type->getSubcolumnSerialization(subcolumn_name, base_serialization_getter); -} - SerializationPtr DataTypeNullable::doGetDefaultSerialization() const { return std::make_shared(nested_data_type->getDefaultSerialization()); diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index 1557179d072..1a54d0de611 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -41,11 +41,6 @@ public: bool onlyNull() const override; bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); } - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - const DataTypePtr & getNestedType() const { return nested_data_type; } private: SerializationPtr doGetDefaultSerialization() const override; diff --git a/src/DataTypes/DataTypeNumberBase.cpp b/src/DataTypes/DataTypeNumberBase.cpp index a73d591654a..f668a4c522e 100644 --- a/src/DataTypes/DataTypeNumberBase.cpp +++ b/src/DataTypes/DataTypeNumberBase.cpp @@ -1,13 +1,6 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include namespace DB diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 41ae578a70f..84610557d21 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -1,14 +1,6 @@ -#include - #include -#include - -#include -#include - #include -#include #include #include #include @@ -16,15 +8,6 @@ #include #include -#include -#include -#include - -#ifdef __SSE2__ - #include -#endif - - namespace DB { diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index 0fc38e9c6f0..fd674505bc0 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -1,7 +1,5 @@ #pragma once -#include - #include diff --git a/src/DataTypes/DataTypeTuple.cpp b/src/DataTypes/DataTypeTuple.cpp index aa0a57c636e..4e1a5a05d45 100644 --- a/src/DataTypes/DataTypeTuple.cpp +++ b/src/DataTypes/DataTypeTuple.cpp @@ -3,20 +3,17 @@ #include #include #include -#include #include #include #include #include -#include +#include #include #include #include -#include #include #include #include -#include #include #include @@ -30,7 +27,6 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int EMPTY_DATA_PASSED; extern const int NOT_FOUND_COLUMN_IN_BLOCK; - extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH; } @@ -107,11 +103,6 @@ static inline IColumn & extractElementColumn(IColumn & column, size_t idx) return assert_cast(column).getColumn(idx); } -static inline const IColumn & extractElementColumn(const IColumn & column, size_t idx) -{ - return assert_cast(column).getColumn(idx); -} - template static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl) { @@ -234,74 +225,6 @@ size_t DataTypeTuple::getSizeOfValueInMemory() const return res; } -template -auto DataTypeTuple::getSubcolumnEntity(const String & subcolumn_name, - const OnSuccess & on_success, const OnContinue & on_continue) const -{ - using ReturnType = decltype(on_success(0)); - for (size_t i = 0; i < names.size(); ++i) - { - if (startsWith(subcolumn_name, names[i])) - { - size_t name_length = names[i].size(); - - if (subcolumn_name.size() == name_length) - return on_success(i); - - if (subcolumn_name[name_length] == '.') - return on_continue(i, subcolumn_name.substr(name_length + 1)); - } - } - - return ReturnType{}; -} - -DataTypePtr DataTypeTuple::tryGetSubcolumnType(const String & subcolumn_name) const -{ - if (subcolumn_name == MAIN_SUBCOLUMN_NAME) - return shared_from_this(); - - auto on_success = [&](size_t pos) { return elems[pos]; }; - auto on_continue = [&](size_t pos, const String & next_subcolumn) { return elems[pos]->tryGetSubcolumnType(next_subcolumn); }; - - return getSubcolumnEntity(subcolumn_name, on_success, on_continue); -} - -ColumnPtr DataTypeTuple::getSubcolumn(const String & subcolumn_name, const IColumn & column) const -{ - auto on_success = [&](size_t pos) { return extractElementColumn(column, pos).getPtr(); }; - auto on_continue = [&](size_t pos, const String & next_subcolumn) - { - return elems[pos]->getSubcolumn(next_subcolumn, extractElementColumn(column, pos)); - }; - - if (auto subcolumn = getSubcolumnEntity(subcolumn_name, on_success, on_continue)) - return subcolumn; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); -} - -SerializationPtr DataTypeTuple::getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const -{ - auto on_success = [&](size_t pos) - { - return std::make_shared(base_serialization_getter(*elems[pos]), names[pos]); - }; - - auto on_continue = [&](size_t pos, const String & next_subcolumn) - { - auto next_serialization = elems[pos]->getSubcolumnSerialization(next_subcolumn, base_serialization_getter); - return std::make_shared(next_serialization, names[pos]); - }; - - if (auto serialization = getSubcolumnEntity(subcolumn_name, on_success, on_continue)) - return serialization; - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); -} - - SerializationPtr DataTypeTuple::doGetDefaultSerialization() const { SerializationTuple::ElementSerializations serializations(elems.size()); @@ -310,7 +233,7 @@ SerializationPtr DataTypeTuple::doGetDefaultSerialization() const { String elem_name = use_explicit_names ? names[i] : toString(i + 1); auto serialization = elems[i]->getDefaultSerialization(); - serializations[i] = std::make_shared(serialization, elem_name); + serializations[i] = std::make_shared(serialization, elem_name); } return std::make_shared(std::move(serializations), use_explicit_names); @@ -325,7 +248,7 @@ SerializationPtr DataTypeTuple::getSerialization(const String & column_name, con String elem_name = use_explicit_names ? names[i] : toString(i + 1); auto subcolumn_name = Nested::concatenateName(column_name, elem_name); auto serializaion = elems[i]->getSerialization(subcolumn_name, callback); - serializations[i] = std::make_shared(serializaion, elem_name); + serializations[i] = std::make_shared(serializaion, elem_name); } return std::make_shared(std::move(serializations), use_explicit_names); diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index e572b23f987..8dae8b7765b 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -52,16 +52,11 @@ public: size_t getMaximumSizeOfValueInMemory() const override; size_t getSizeOfValueInMemory() const override; - DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override; - ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override; - SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const override; - SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override; - SerializationPtr doGetDefaultSerialization() const override; + const DataTypePtr & getElement(size_t i) const { return elems[i]; } const DataTypes & getElements() const { return elems; } const Strings & getElementNames() const { return names; } @@ -69,11 +64,6 @@ public: bool haveExplicitNames() const { return have_explicit_names; } bool serializeNames() const { return serialize_names; } - -private: - template - auto getSubcolumnEntity(const String & subcolumn_name, - const OnSuccess & on_success, const OnContinue & on_continue) const; }; } diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 9f7320197c8..f0fbd6cab26 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -1,16 +1,13 @@ #include #include -#include #include #include #include #include #include #include -#include #include -#include #include diff --git a/src/DataTypes/EnumValues.cpp b/src/DataTypes/EnumValues.cpp index 6df899ba9a2..ab5ea0ca249 100644 --- a/src/DataTypes/EnumValues.cpp +++ b/src/DataTypes/EnumValues.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -82,6 +83,24 @@ Names EnumValues::getAllRegisteredNames() const return result; } +template +std::unordered_set EnumValues::getSetOfAllNames(bool to_lower) const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(to_lower ? boost::algorithm::to_lower_copy(value.first) : value.first); + return result; +} + +template +std::unordered_set EnumValues::getSetOfAllValues() const +{ + std::unordered_set result; + for (const auto & value : values) + result.insert(value.second); + return result; +} + template class EnumValues; template class EnumValues; diff --git a/src/DataTypes/EnumValues.h b/src/DataTypes/EnumValues.h index 1e5e4f55ea7..17c292c5551 100644 --- a/src/DataTypes/EnumValues.h +++ b/src/DataTypes/EnumValues.h @@ -80,6 +80,10 @@ public: } Names getAllRegisteredNames() const override; + + std::unordered_set getSetOfAllNames(bool to_lower) const; + + std::unordered_set getSetOfAllValues() const; }; } diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 93bb1757a4d..669876c792d 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -11,7 +10,6 @@ #include #include #include -#include namespace DB @@ -65,12 +63,40 @@ size_t IDataType::getSizeOfValueInMemory() const throw Exception("Value of type " + getName() + " in memory is not of fixed size.", ErrorCodes::LOGICAL_ERROR); } +void IDataType::forEachSubcolumn( + const SubcolumnCallback & callback, + const SerializationPtr & serialization, + const DataTypePtr & type, + const ColumnPtr & column) +{ + ISerialization::StreamCallback callback_with_data = [&](const auto & subpath) + { + for (size_t i = 0; i < subpath.size(); ++i) + { + if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, i + 1)) + { + auto name = ISerialization::getSubcolumnNameForStream(subpath, i + 1); + auto data = ISerialization::createFromPath(subpath, i); + callback(subpath, name, data); + } + subpath[i].visited = true; + } + }; + + ISerialization::SubstreamPath path; + serialization->enumerateStreams(path, callback_with_data, type, column); +} + DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const { - if (subcolumn_name == MAIN_SUBCOLUMN_NAME) - return shared_from_this(); + DataTypePtr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & data) + { + if (name == subcolumn_name) + res = data.type; + }, getDefaultSerialization(), getPtr(), nullptr); - return nullptr; + return res; } DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const @@ -82,42 +108,43 @@ DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } -ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &) const +SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const { + SerializationPtr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & data) + { + if (name == subcolumn_name) + res = data.serialization; + }, serialization, nullptr, nullptr); + + if (res) + return res; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } -void IDataType::forEachSubcolumn(const SubcolumnCallback & callback) const +ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const { - NameSet set; - getDefaultSerialization()->enumerateStreams([&, this](const ISerialization::SubstreamPath & substream_path) + ColumnPtr res; + forEachSubcolumn([&](const auto &, const auto & name, const auto & data) { - ISerialization::SubstreamPath new_path; - /// Iterate over path to try to get intermediate subcolumns for complex nested types. - for (const auto & elem : substream_path) - { - new_path.push_back(elem); - auto name = ISerialization::getSubcolumnNameForStream(new_path); - auto type = tryGetSubcolumnType(name); + if (name == subcolumn_name) + res = data.column; + }, getDefaultSerialization(), nullptr, column); - /// Subcolumn names may repeat among several substream paths. - if (!name.empty() && type && !set.count(name)) - { - callback(name, type, substream_path); - set.insert(name); - } - } - }); + if (res) + return res; + + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } Names IDataType::getSubcolumnNames() const { Names res; - forEachSubcolumn([&](const auto & name, const auto &, const auto &) + forEachSubcolumn([&](const auto &, const auto & name, const auto &) { res.push_back(name); - }); - + }, getDefaultSerialization(), nullptr, nullptr); return res; } @@ -144,24 +171,14 @@ SerializationPtr IDataType::getDefaultSerialization() const return doGetDefaultSerialization(); } -SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const BaseSerializationGetter &) const -{ - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); -} - // static SerializationPtr IDataType::getSerialization(const NameAndTypePair & column, const IDataType::StreamExistenceCallback & callback) { if (column.isSubcolumn()) { - /// Wrap to custom serialization deepest subcolumn, which is represented in non-complex type. - auto base_serialization_getter = [&](const IDataType & subcolumn_type) - { - return subcolumn_type.getSerialization(column.name, callback); - }; - const auto & type_in_storage = column.getTypeInStorage(); - return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), base_serialization_getter); + auto default_serialization = type_in_storage->getDefaultSerialization(); + return type_in_storage->getSubcolumnSerialization(column.getSubcolumnName(), default_serialization); } return column.type->getSerialization(column.name, callback); @@ -172,21 +189,4 @@ SerializationPtr IDataType::getSerialization(const String &, const StreamExisten return getDefaultSerialization(); } -DataTypePtr IDataType::getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const -{ - auto type = tryGetSubcolumnType(ISerialization::getSubcolumnNameForStream(substream_path)); - if (type) - return type->getSubcolumnType(MAIN_SUBCOLUMN_NAME); - - return getSubcolumnType(MAIN_SUBCOLUMN_NAME); -} - -void IDataType::enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback, ISerialization::SubstreamPath & path) const -{ - serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) - { - callback(substream_path, *getTypeForSubstream(substream_path)); - }, path); -} - } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 360bf9f16e0..a53fdac797f 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -70,19 +70,31 @@ public: return doGetName(); } + DataTypePtr getPtr() const { return shared_from_this(); } + /// Name of data type family (example: FixedString, Array). virtual const char * getFamilyName() const = 0; /// Data type id. It's used for runtime type checks. virtual TypeIndex getTypeId() const = 0; - static constexpr auto MAIN_SUBCOLUMN_NAME = "__main"; - virtual DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const; + DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const; DataTypePtr getSubcolumnType(const String & subcolumn_name) const; - virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const; - using SubcolumnCallback = std::function; - void forEachSubcolumn(const SubcolumnCallback & callback) const; + SerializationPtr getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const; + ColumnPtr getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const; + + using SubcolumnCallback = std::function; + + static void forEachSubcolumn( + const SubcolumnCallback & callback, + const SerializationPtr & serialization, + const DataTypePtr & type, + const ColumnPtr & column); + Names getSubcolumnNames() const; /// Returns default serialization of data type. @@ -93,7 +105,6 @@ public: /// one of serialization types, that serialization will be chosen for reading. /// If callback always returned false, the default serialization will be chosen. using StreamExistenceCallback = std::function; - using BaseSerializationGetter = std::function; /// Chooses serialization for reading of one column or subcolumns by /// checking existence of substreams using callback. @@ -103,22 +114,10 @@ public: virtual SerializationPtr getSerialization(const String & column_name, const StreamExistenceCallback & callback) const; - /// Returns serialization wrapper for reading one particular subcolumn of data type. - virtual SerializationPtr getSubcolumnSerialization( - const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const; - - using StreamCallbackWithType = std::function; - - void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback, ISerialization::SubstreamPath & path) const; - void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback, ISerialization::SubstreamPath && path) const { enumerateStreams(serialization, callback, path); } - void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback) const { enumerateStreams(serialization, callback, {}); } - protected: virtual String doGetName() const { return getFamilyName(); } virtual SerializationPtr doGetDefaultSerialization() const = 0; - DataTypePtr getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const; - public: /** Create empty column for corresponding type. */ diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 7077c5bfa14..5c0274b0e35 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -17,30 +18,11 @@ namespace ErrorCodes String ISerialization::Substream::toString() const { - switch (type) - { - case ArrayElements: - return "ArrayElements"; - case ArraySizes: - return "ArraySizes"; - case NullableElements: - return "NullableElements"; - case NullMap: - return "NullMap"; - case TupleElement: - return "TupleElement(" + tuple_element_name + ", " - + std::to_string(escape_tuple_delimiter) + ")"; - case DictionaryKeys: - return "DictionaryKeys"; - case DictionaryIndexes: - return "DictionaryIndexes"; - case SparseElements: - return "SparseElements"; - case SparseOffsets: - return "SparseOffsets"; - } + if (type == TupleElement) + return fmt::format("TupleElement({}, escape_tuple_delimiter={})", + tuple_element_name, escape_tuple_delimiter ? "true" : "false"); - __builtin_unreachable(); + return String(magic_enum::enum_name(type)); } String ISerialization::SubstreamPath::toString() const @@ -57,9 +39,21 @@ String ISerialization::SubstreamPath::toString() const return wb.str(); } +void ISerialization::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const +{ + path.push_back(Substream::Regular); + path.back().data = {type, column, getPtr(), nullptr}; + callback(path); + path.pop_back(); +} + void ISerialization::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const { - callback(path); + enumerateStreams(path, callback, nullptr, nullptr); } void ISerialization::serializeBinaryBulk(const IColumn & column, WriteBuffer &, size_t, size_t) const @@ -104,40 +98,48 @@ void ISerialization::deserializeBinaryBulkWithMultipleStreams( } } -static String getNameForSubstreamPath( +namespace +{ + +using SubstreamIterator = ISerialization::SubstreamPath::const_iterator; + +String getNameForSubstreamPath( String stream_name, - const ISerialization::SubstreamPath & path, + SubstreamIterator begin, + SubstreamIterator end, bool escape_tuple_delimiter) { using Substream = ISerialization::Substream; size_t array_level = 0; - for (const auto & elem : path) + for (auto it = begin; it != end; ++it) { - if (elem.type == Substream::NullMap) + if (it->type == Substream::NullMap) stream_name += ".null"; - else if (elem.type == Substream::ArraySizes) + else if (it->type == Substream::ArraySizes) stream_name += ".size" + toString(array_level); - else if (elem.type == Substream::ArrayElements) + else if (it->type == Substream::ArrayElements) ++array_level; - else if (elem.type == Substream::DictionaryKeys) + else if (it->type == Substream::DictionaryKeys) stream_name += ".dict"; - else if (elem.type == Substream::SparseOffsets) + else if (it->type == Substream::SparseOffsets) stream_name += ".sparse.idx"; - else if (elem.type == Substream::TupleElement) + else if (it->type == Substream::TupleElement) { /// For compatibility reasons, we use %2E (escaped dot) instead of dot. /// Because nested data may be represented not by Array of Tuple, /// but by separate Array columns with names in a form of a.b, /// and name is encoded as a whole. - stream_name += (escape_tuple_delimiter && elem.escape_tuple_delimiter ? - escapeForFileName(".") : ".") + escapeForFileName(elem.tuple_element_name); + stream_name += (escape_tuple_delimiter && it->escape_tuple_delimiter ? + escapeForFileName(".") : ".") + escapeForFileName(it->tuple_element_name); } } return stream_name; } +} + String ISerialization::getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path) { return getFileNameForStream(column.getNameInStorage(), path); @@ -152,12 +154,17 @@ String ISerialization::getFileNameForStream(const String & name_in_storage, cons else stream_name = escapeForFileName(name_in_storage); - return getNameForSubstreamPath(std::move(stream_name), path, true); + return getNameForSubstreamPath(std::move(stream_name), path.begin(), path.end(), true); } String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path) { - auto subcolumn_name = getNameForSubstreamPath("", path, false); + return getSubcolumnNameForStream(path, path.size()); +} + +String ISerialization::getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len) +{ + auto subcolumn_name = getNameForSubstreamPath("", path.begin(), path.begin() + prefix_len, false); if (!subcolumn_name.empty()) subcolumn_name = subcolumn_name.substr(1); // It starts with a dot. @@ -195,4 +202,44 @@ bool ISerialization::isSpecialCompressionAllowed(const SubstreamPath & path) return true; } +size_t ISerialization::getArrayLevel(const SubstreamPath & path) +{ + size_t level = 0; + for (const auto & elem : path) + level += elem.type == Substream::ArrayElements; + return level; } + +bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len) +{ + if (prefix_len == 0 || prefix_len > path.size()) + return false; + + size_t last_elem = prefix_len - 1; + return path[last_elem].type == Substream::NullMap + || path[last_elem].type == Substream::TupleElement + || path[last_elem].type == Substream::ArraySizes; +} + +ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len) +{ + assert(prefix_len < path.size()); + + SubstreamData res = path[prefix_len].data; + res.creator.reset(); + for (ssize_t i = static_cast(prefix_len) - 1; i >= 0; --i) + { + const auto & creator = path[i].data.creator; + if (creator) + { + res.type = res.type ? creator->create(res.type) : res.type; + res.serialization = res.serialization ? creator->create(res.serialization) : res.serialization; + res.column = res.column ? creator->create(res.column) : res.column; + } + } + + return res; +} + +} + diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index f1d82a2000a..7562cfcb9a0 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -2,35 +2,39 @@ #include #include +#include +#include #include #include namespace DB { -class IDataType; - class ReadBuffer; class WriteBuffer; class ProtobufReader; class ProtobufWriter; -class IColumn; -using ColumnPtr = COW::Ptr; -using MutableColumnPtr = COW::MutablePtr; +class IDataType; +using DataTypePtr = std::shared_ptr; + +class ISerialization; +using SerializationPtr = std::shared_ptr; class Field; struct FormatSettings; struct NameAndTypePair; -class ISerialization +class ISerialization : private boost::noncopyable, public std::enable_shared_from_this { public: ISerialization() = default; virtual ~ISerialization() = default; + SerializationPtr getPtr() const { return shared_from_this(); } + /** Binary serialization for range of values in column - for writing to disk/network, etc. * * Some data types are represented in multiple streams while being serialized. @@ -54,6 +58,24 @@ public: * Default implementations of ...WithMultipleStreams methods will call serializeBinaryBulk, deserializeBinaryBulk for single stream. */ + struct ISubcolumnCreator + { + virtual DataTypePtr create(const DataTypePtr & prev) const = 0; + virtual SerializationPtr create(const SerializationPtr & prev) const = 0; + virtual ColumnPtr create(const ColumnPtr & prev) const = 0; + virtual ~ISubcolumnCreator() = default; + }; + + using SubcolumnCreatorPtr = std::shared_ptr; + + struct SubstreamData + { + DataTypePtr type; + ColumnPtr column; + SerializationPtr serialization; + SubcolumnCreatorPtr creator; + }; + struct Substream { enum Type @@ -71,7 +93,10 @@ public: SparseElements, SparseOffsets, + + Regular, }; + Type type; /// Index of tuple element, starting at 1 or name. @@ -80,6 +105,12 @@ public: /// Do we need to escape a dot in filenames for tuple elements. bool escape_tuple_delimiter = true; + /// Data for current substream. + SubstreamData data; + + /// Flag, that may help to traverse substream paths. + mutable bool visited = false; + Substream(Type type_) : type(type_) {} String toString() const; @@ -96,7 +127,13 @@ public: using StreamCallback = std::function; - virtual void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const; + virtual void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const; + + void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const; void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); } void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); } @@ -249,11 +286,16 @@ public: static String getFileNameForStream(const NameAndTypePair & column, const SubstreamPath & path); static String getFileNameForStream(const String & name_in_storage, const SubstreamPath & path); static String getSubcolumnNameForStream(const SubstreamPath & path); + static String getSubcolumnNameForStream(const SubstreamPath & path, size_t prefix_len); static void addToSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path, ColumnPtr column); static ColumnPtr getFromSubstreamsCache(SubstreamsCache * cache, const SubstreamPath & path); static bool isSpecialCompressionAllowed(const SubstreamPath & path); + static size_t getArrayLevel(const SubstreamPath & path); + + static bool hasSubcolumnForPath(const SubstreamPath & path, size_t prefix_len); + static SubstreamData createFromPath(const SubstreamPath & path, size_t prefix_len); }; using SerializationPtr = std::shared_ptr; diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 925ba0b9e74..2339f23853e 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -1,7 +1,6 @@ #include #include -#include #include diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 70a72c51e78..4ccee54c294 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include #include #include #include @@ -177,16 +180,53 @@ ColumnPtr arrayOffsetsToSizes(const IColumn & column) return column_sizes; } - -void SerializationArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +DataTypePtr SerializationArray::SubcolumnCreator::create(const DataTypePtr & prev) const { - path.push_back(Substream::ArraySizes); - callback(path); - path.back() = Substream::ArrayElements; - nested->enumerateStreams(callback, path); - path.pop_back(); + return std::make_shared(prev); } +SerializationPtr SerializationArray::SubcolumnCreator::create(const SerializationPtr & prev) const +{ + return std::make_shared(prev); +} + +ColumnPtr SerializationArray::SubcolumnCreator::create(const ColumnPtr & prev) const +{ + return ColumnArray::create(prev, offsets); +} + +void SerializationArray::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const +{ + const auto * type_array = type ? &assert_cast(*type) : nullptr; + const auto * column_array = column ? &assert_cast(*column) : nullptr; + auto offsets_column = column_array ? column_array->getOffsetsPtr() : nullptr; + + path.push_back(Substream::ArraySizes); + path.back().data = + { + type ? std::make_shared() : nullptr, + offsets_column ? arrayOffsetsToSizes(*offsets_column) : nullptr, + std::make_shared( + std::make_shared>(), + "size" + std::to_string(getArrayLevel(path)), false), + nullptr, + }; + + callback(path); + + path.back() = Substream::ArrayElements; + path.back().data = {type, column, getPtr(), std::make_shared(offsets_column)}; + + auto next_type = type_array ? type_array->getNestedType() : nullptr; + auto next_column = column_array ? column_array->getDataPtr() : nullptr; + + nested->enumerateStreams(path, callback, next_type, next_column); + path.pop_back(); +} void SerializationArray::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index 71037090a48..83045d4c033 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -35,7 +35,11 @@ public: * This is necessary, because when implementing nested structures, several arrays can have common sizes. */ - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, @@ -62,6 +66,18 @@ public: DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state, SubstreamsCache * cache) const override; + +private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr offsets; + + SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {} + + DataTypePtr create(const DataTypePtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + }; }; ColumnPtr arrayOffsetsToSizes(const IColumn & column); diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index e43edbac592..ff1bb9b2c30 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -8,6 +8,7 @@ namespace DB { + void SerializationDate32::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { writeDateText(ExtendedDayNum(assert_cast(column).getData()[row_num]), ostr); diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index b93c69203cb..0e3b60f3772 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -32,9 +32,8 @@ inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & setti } -SerializationDateTime::SerializationDateTime( - const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_) - : time_zone(time_zone_), utc_time_zone(utc_time_zone_) +SerializationDateTime::SerializationDateTime(const TimezoneMixin & time_zone_) + : TimezoneMixin(time_zone_) { } diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 8cf57ddef89..75334592422 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -1,20 +1,17 @@ #pragma once #include +#include class DateLUTImpl; namespace DB { -class SerializationDateTime final : public SerializationNumber +class SerializationDateTime final : public SerializationNumber, public TimezoneMixin { -private: - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; - public: - SerializationDateTime(const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_); + SerializationDateTime(const TimezoneMixin & time_zone_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index 8d446d3b9ad..613422f21fb 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -17,9 +17,9 @@ namespace DB { SerializationDateTime64::SerializationDateTime64( - const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_, UInt32 scale_) + UInt32 scale_, const TimezoneMixin & time_zone_) : SerializationDecimalBase(DecimalUtils::max_precision, scale_) - , time_zone(time_zone_), utc_time_zone(utc_time_zone_) + , TimezoneMixin(time_zone_) { } diff --git a/src/DataTypes/Serializations/SerializationDateTime64.h b/src/DataTypes/Serializations/SerializationDateTime64.h index c36649daef1..1679170b36f 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.h +++ b/src/DataTypes/Serializations/SerializationDateTime64.h @@ -1,20 +1,17 @@ #pragma once #include +#include class DateLUTImpl; namespace DB { -class SerializationDateTime64 final : public SerializationDecimalBase +class SerializationDateTime64 final : public SerializationDecimalBase, public TimezoneMixin { -private: - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; - public: - SerializationDateTime64(const DateLUTImpl & time_zone_, const DateLUTImpl & utc_time_zone_, UInt32 scale_); + SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.cpp b/src/DataTypes/Serializations/SerializationLowCardinality.cpp index e9bb62f74c5..7a86d5413b2 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.cpp +++ b/src/DataTypes/Serializations/SerializationLowCardinality.cpp @@ -40,11 +40,27 @@ SerializationLowCardinality::SerializationLowCardinality(const DataTypePtr & dic { } -void SerializationLowCardinality::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationLowCardinality::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { + const auto * column_lc = column ? &getColumnLowCardinality(*column) : nullptr; + + SubstreamData data; + data.type = type ? dictionary_type : nullptr; + data.column = column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr; + data.serialization = dict_inner_serialization; + path.push_back(Substream::DictionaryKeys); - dict_inner_serialization->enumerateStreams(callback, path); + path.back().data = data; + + dict_inner_serialization->enumerateStreams(path, callback, data.type, data.column); + path.back() = Substream::DictionaryIndexes; + path.back().data = {type, column, getPtr(), nullptr}; + callback(path); path.pop_back(); } diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index e9ca0349e38..f82b35a52d5 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -17,7 +17,11 @@ private: public: SerializationLowCardinality(const DataTypePtr & dictionary_type); - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index e8446781f10..2e436070e1c 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -250,10 +251,16 @@ void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c deserializeText(column, rb, settings); } - -void SerializationMap::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationMap::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { - nested->enumerateStreams(callback, path); + auto next_type = type ? assert_cast(*type).getNestedType() : nullptr; + auto next_column = column ? assert_cast(*column).getNestedColumnPtr() : nullptr; + + nested->enumerateStreams(path, callback, next_type, next_column); } void SerializationMap::serializeBinaryBulkStatePrefix( diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index 6523d5388d0..b6a003139ec 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -31,7 +31,11 @@ public: void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationTupleElement.cpp b/src/DataTypes/Serializations/SerializationNamed.cpp similarity index 69% rename from src/DataTypes/Serializations/SerializationTupleElement.cpp rename to src/DataTypes/Serializations/SerializationNamed.cpp index 4b50810fcd6..4ef4d4527f8 100644 --- a/src/DataTypes/Serializations/SerializationTupleElement.cpp +++ b/src/DataTypes/Serializations/SerializationNamed.cpp @@ -1,18 +1,21 @@ -#include +#include namespace DB { -void SerializationTupleElement::enumerateStreams( +void SerializationNamed::enumerateStreams( + SubstreamPath & path, const StreamCallback & callback, - SubstreamPath & path) const + DataTypePtr type, + ColumnPtr column) const { addToPath(path); - nested_serialization->enumerateStreams(callback, path); + path.back().data = {type, column, getPtr(), std::make_shared(name, escape_delimiter)}; + nested_serialization->enumerateStreams(path, callback, type, column); path.pop_back(); } -void SerializationTupleElement::serializeBinaryBulkStatePrefix( +void SerializationNamed::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { @@ -21,7 +24,7 @@ void SerializationTupleElement::serializeBinaryBulkStatePrefix( settings.path.pop_back(); } -void SerializationTupleElement::serializeBinaryBulkStateSuffix( +void SerializationNamed::serializeBinaryBulkStateSuffix( SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const { @@ -30,7 +33,7 @@ void SerializationTupleElement::serializeBinaryBulkStateSuffix( settings.path.pop_back(); } -void SerializationTupleElement::deserializeBinaryBulkStatePrefix( +void SerializationNamed::deserializeBinaryBulkStatePrefix( DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const { @@ -39,7 +42,7 @@ void SerializationTupleElement::deserializeBinaryBulkStatePrefix( settings.path.pop_back(); } -void SerializationTupleElement::serializeBinaryBulkWithMultipleStreams( +void SerializationNamed::serializeBinaryBulkWithMultipleStreams( const IColumn & column, size_t offset, size_t limit, @@ -51,7 +54,7 @@ void SerializationTupleElement::serializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } -void SerializationTupleElement::deserializeBinaryBulkWithMultipleStreams( +void SerializationNamed::deserializeBinaryBulkWithMultipleStreams( ColumnPtr & column, size_t limit, DeserializeBinaryBulkSettings & settings, @@ -63,7 +66,7 @@ void SerializationTupleElement::deserializeBinaryBulkWithMultipleStreams( settings.path.pop_back(); } -void SerializationTupleElement::addToPath(SubstreamPath & path) const +void SerializationNamed::addToPath(SubstreamPath & path) const { path.push_back(Substream::TupleElement); path.back().tuple_element_name = name; diff --git a/src/DataTypes/Serializations/SerializationTupleElement.h b/src/DataTypes/Serializations/SerializationNamed.h similarity index 59% rename from src/DataTypes/Serializations/SerializationTupleElement.h rename to src/DataTypes/Serializations/SerializationNamed.h index b85014c9e64..20dd15a20ba 100644 --- a/src/DataTypes/Serializations/SerializationTupleElement.h +++ b/src/DataTypes/Serializations/SerializationNamed.h @@ -5,14 +5,14 @@ namespace DB { -class SerializationTupleElement final : public SerializationWrapper +class SerializationNamed final : public SerializationWrapper { private: String name; bool escape_delimiter; public: - SerializationTupleElement(const SerializationPtr & nested_, const String & name_, bool escape_delimiter_ = true) + SerializationNamed(const SerializationPtr & nested_, const String & name_, bool escape_delimiter_ = true) : SerializationWrapper(nested_) , name(name_), escape_delimiter(escape_delimiter_) { @@ -21,11 +21,13 @@ public: const String & getElementName() const { return name; } void enumerateStreams( + SubstreamPath & path, const StreamCallback & callback, - SubstreamPath & path) const override; + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( - SerializeBinaryBulkSettings & settings, + SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override; void serializeBinaryBulkStateSuffix( @@ -51,6 +53,22 @@ public: SubstreamsCache * cache) const override; private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const String name; + const bool escape_delimiter; + + SubcolumnCreator(const String & name_, bool escape_delimiter_) + : name(name_), escape_delimiter(escape_delimiter_) {} + + DataTypePtr create(const DataTypePtr & prev) const override { return prev; } + ColumnPtr create(const ColumnPtr & prev) const override { return prev; } + SerializationPtr create(const SerializationPtr & prev) const override + { + return std::make_shared(prev, name, escape_delimiter); + } + }; + void addToPath(SubstreamPath & path) const; }; diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index b607d5871d6..560a4812123 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -1,5 +1,8 @@ #include #include +#include +#include +#include #include #include @@ -20,15 +23,50 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; } -void SerializationNullable::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +DataTypePtr SerializationNullable::SubcolumnCreator::create(const DataTypePtr & prev) const { - path.push_back(Substream::NullMap); - callback(path); - path.back() = Substream::NullableElements; - nested->enumerateStreams(callback, path); - path.pop_back(); + return std::make_shared(prev); } +SerializationPtr SerializationNullable::SubcolumnCreator::create(const SerializationPtr & prev) const +{ + return std::make_shared(prev); +} + +ColumnPtr SerializationNullable::SubcolumnCreator::create(const ColumnPtr & prev) const +{ + return ColumnNullable::create(prev, null_map); +} + +void SerializationNullable::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const +{ + const auto * type_nullable = type ? &assert_cast(*type) : nullptr; + const auto * column_nullable = column ? &assert_cast(*column) : nullptr; + + path.push_back(Substream::NullMap); + path.back().data = + { + type_nullable ? std::make_shared() : nullptr, + column_nullable ? column_nullable->getNullMapColumnPtr() : nullptr, + std::make_shared(std::make_shared>(), "null", false), + nullptr, + }; + + callback(path); + + path.back() = Substream::NullableElements; + path.back().data = {type, column, getPtr(), std::make_shared(path.back().data.column)}; + + auto next_type = type_nullable ? type_nullable->getNestedType() : nullptr; + auto next_column = column_nullable ? column_nullable->getNestedColumnPtr() : nullptr; + + nested->enumerateStreams(path, callback, next_type, next_column); + path.pop_back(); +} void SerializationNullable::serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index b0b96c021d3..c39c4dd6573 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -13,7 +13,11 @@ private: public: SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {} - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, @@ -80,6 +84,18 @@ public: static ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested); template static ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested); + +private: + struct SubcolumnCreator : public ISubcolumnCreator + { + const ColumnPtr null_map; + + SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {} + + DataTypePtr create(const DataTypePtr & prev) const override; + SerializationPtr create(const SerializationPtr & prev) const override; + ColumnPtr create(const ColumnPtr & prev) const override; + }; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 1324c6b2b1a..33a90a4abf6 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -281,10 +282,22 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr, }); } -void SerializationTuple::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationTuple::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { - for (const auto & elem : elems) - elem->enumerateStreams(callback, path); + const auto * type_tuple = type ? &assert_cast(*type) : nullptr; + const auto * column_tuple = column ? &assert_cast(*column) : nullptr; + + for (size_t i = 0; i < elems.size(); ++i) + { + auto next_type = type_tuple ? type_tuple->getElement(i) : nullptr; + auto next_column = column_tuple ? column_tuple->getColumnPtr(i) : nullptr; + + elems[i]->enumerateStreams(path, callback, next_type, next_column); + } } struct SerializeBinaryBulkStateTuple : public ISerialization::SerializeBinaryBulkState diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 13668572fff..77f8de90c83 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { @@ -9,7 +9,7 @@ namespace DB class SerializationTuple final : public SimpleTextSerialization { public: - using ElementSerializationPtr = std::shared_ptr; + using ElementSerializationPtr = std::shared_ptr; using ElementSerializations = std::vector; SerializationTuple(const ElementSerializations & elems_, bool have_explicit_names_) @@ -31,7 +31,11 @@ public: /** Each sub-column in a tuple is serialized in separate stream. */ - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/Serializations/SerializationWrapper.cpp b/src/DataTypes/Serializations/SerializationWrapper.cpp index f75c9a1dd8b..c0829ab1b26 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.cpp +++ b/src/DataTypes/Serializations/SerializationWrapper.cpp @@ -4,9 +4,13 @@ namespace DB { -void SerializationWrapper::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const +void SerializationWrapper::enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const { - nested_serialization->enumerateStreams(callback, path); + nested_serialization->enumerateStreams(path, callback, type, column); } void SerializationWrapper::serializeBinaryBulkStatePrefix( diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 399d3b198b3..c48278d53db 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -16,7 +16,11 @@ protected: public: SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} - void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override; + void enumerateStreams( + SubstreamPath & path, + const StreamCallback & callback, + DataTypePtr type, + ColumnPtr column) const override; void serializeBinaryBulkStatePrefix( SerializeBinaryBulkSettings & settings, diff --git a/src/DataTypes/TimezoneMixin.h b/src/DataTypes/TimezoneMixin.h new file mode 100644 index 00000000000..e6e9f7a7989 --- /dev/null +++ b/src/DataTypes/TimezoneMixin.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include + +class DateLUTImpl; + +/** Mixin-class that manages timezone info for timezone-aware DateTime implementations + * + * Must be used as a (second) base for class implementing IDateType/ISerialization-interface. + */ +class TimezoneMixin +{ +public: + TimezoneMixin(const TimezoneMixin &) = default; + + explicit TimezoneMixin(const String & time_zone_name = "") + : has_explicit_time_zone(!time_zone_name.empty()) + , time_zone(DateLUT::instance(time_zone_name)) + , utc_time_zone(DateLUT::instance("UTC")) + { + } + + const DateLUTImpl & getTimeZone() const { return time_zone; } + bool hasExplicitTimeZone() const { return has_explicit_time_zone; } + +protected: + /// true if time zone name was provided in data type parameters, false if it's using default time zone. + bool has_explicit_time_zone; + + const DateLUTImpl & time_zone; + const DateLUTImpl & utc_time_zone; +}; diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index a950d18b50a..f8d10535be2 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -277,17 +277,18 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// For Date and DateTime/DateTime64, the common type is DateTime/DateTime64. No other types are compatible. { UInt32 have_date = type_ids.count(TypeIndex::Date); + UInt32 have_date32 = type_ids.count(TypeIndex::Date32); UInt32 have_datetime = type_ids.count(TypeIndex::DateTime); UInt32 have_datetime64 = type_ids.count(TypeIndex::DateTime64); - if (have_date || have_datetime || have_datetime64) + if (have_date || have_date32 || have_datetime || have_datetime64) { - bool all_date_or_datetime = type_ids.size() == (have_date + have_datetime + have_datetime64); + bool all_date_or_datetime = type_ids.size() == (have_date + have_date32 + have_datetime + have_datetime64); if (!all_date_or_datetime) - throw Exception(getExceptionMessagePrefix(types) + " because some of them are Date/DateTime/DateTime64 and some of them are not", + throw Exception(getExceptionMessagePrefix(types) + " because some of them are Date/Date32/DateTime/DateTime64 and some of them are not", ErrorCodes::NO_COMMON_TYPE); - if (have_datetime64 == 0) + if (have_datetime64 == 0 && have_date32 == 0) { for (const auto & type : types) { @@ -298,6 +299,22 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return std::make_shared(); } + /// For Date and Date32, the common type is Date32 + if (have_datetime == 0 && have_datetime64 == 0) + { + for (const auto & type : types) + { + if (isDate32(type)) + return type; + } + } + + /// For Datetime and Date32, the common type is Datetime64 + if (have_datetime == 1 && have_date32 == 1 && have_datetime64 == 0) + { + return std::make_shared(0); + } + UInt8 max_scale = 0; size_t max_scale_date_time_index = 0; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 97e59f53f64..638aef7186c 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -180,6 +180,8 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo if (metadata.sampling_key.definition_ast) storage_ast.set(storage_ast.sample_by, metadata.sampling_key.definition_ast); + else if (storage_ast.sample_by != nullptr) /// SAMPLE BY was removed + storage_ast.sample_by = nullptr; if (metadata.table_ttl.definition_ast) storage_ast.set(storage_ast.ttl_table, metadata.table_ttl.definition_ast); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 60526a1e5b0..21d927dea77 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -4,8 +4,7 @@ #include #include #include -#include -#include +#include #include diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 2377b7da809..71b0c2ec6ea 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -11,9 +11,9 @@ # include # include # include -# include +# include # include -# include +# include # include # include # include diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp index 618f6bf6d34..851c9bc74bd 100644 --- a/src/Databases/MySQL/FetchTablesColumnsList.cpp +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -9,8 +9,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 514978f2456..0facdfc20be 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -5,9 +5,9 @@ #include #include #include -#include +#include #include -#include +#include #include #include #include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 5a714645978..86481d9fd84 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -9,16 +9,14 @@ # include # include # include -# include +# include # include # include # include # include -# include -# include # include # include -# include +# include # include # include # include diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index 0cd0701439f..b8c985915dc 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -8,7 +8,7 @@ # include # include -# include +# include # include # include # include diff --git a/src/Dictionaries/CMakeLists.txt b/src/Dictionaries/CMakeLists.txt index bc5f0dc9567..b1b3d6d55e0 100644 --- a/src/Dictionaries/CMakeLists.txt +++ b/src/Dictionaries/CMakeLists.txt @@ -10,6 +10,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELW set_source_files_properties( FlatDictionary.cpp HashedDictionary.cpp + HashedArrayDictionary.cpp CacheDictionary.cpp RangeHashedDictionary.cpp DirectDictionary.cpp diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 7683f9d4244..f7e9ce6624c 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include namespace ProfileEvents { diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index b563ac797c0..a5a04d277da 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -2,11 +2,11 @@ #include #include #include -#include +#include #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index 6266bd2cf4f..65c40898983 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index cf003dceb8e..cd87cf831a2 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -1,7 +1,6 @@ #include "DictionarySourceHelpers.h" #include #include -#include #include #include #include "DictionaryStructure.h" diff --git a/src/Dictionaries/DirectDictionary.cpp b/src/Dictionaries/DirectDictionary.cpp index 03d3b579ec3..f4f33439638 100644 --- a/src/Dictionaries/DirectDictionary.cpp +++ b/src/Dictionaries/DirectDictionary.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include namespace DB diff --git a/src/Dictionaries/ExecutableDictionarySource.cpp b/src/Dictionaries/ExecutableDictionarySource.cpp index 91aeda924a1..c09993c2a84 100644 --- a/src/Dictionaries/ExecutableDictionarySource.cpp +++ b/src/Dictionaries/ExecutableDictionarySource.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 79e9b627836..dce2ce94b93 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include @@ -100,7 +100,7 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block) config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, configuration.command_termination_timeout }; auto shell_command = ShellCommand::execute(config); return shell_command; - }, configuration.max_command_execution_time * 1000); + }, configuration.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.h b/src/Dictionaries/ExecutablePoolDictionarySource.h index b7e8468b815..51215b6311b 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.h +++ b/src/Dictionaries/ExecutablePoolDictionarySource.h @@ -7,7 +7,7 @@ #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/FileDictionarySource.cpp b/src/Dictionaries/FileDictionarySource.cpp index 8c1f099f344..7fd2dbf80f1 100644 --- a/src/Dictionaries/FileDictionarySource.cpp +++ b/src/Dictionaries/FileDictionarySource.cpp @@ -31,7 +31,7 @@ FileDictionarySource::FileDictionarySource( , context(context_) { auto user_files_path = context->getUserFilesPath(); - if (created_from_ddl && !pathStartsWith(filepath, user_files_path)) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(filepath, user_files_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", filepath, user_files_path); } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index e14ee5d30d1..a7cf69bb14d 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include @@ -403,6 +403,11 @@ void FlatDictionary::calculateBytesAllocated() }; callOnDictionaryAttributeType(attribute.type, type_call); + + bytes_allocated += sizeof(attribute.is_nullable_set); + + if (attribute.is_nullable_set.has_value()) + bytes_allocated = attribute.is_nullable_set->getBufferSizeInBytes(); } if (update_field_loaded_block) diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index eb551d9222d..26ebde36f7d 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -1,6 +1,5 @@ #include "HTTPDictionarySource.h" -#include -#include +#include #include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp new file mode 100644 index 00000000000..3c02f377c3e --- /dev/null +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -0,0 +1,691 @@ +#include "HashedArrayDictionary.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int DICTIONARY_IS_EMPTY; + extern const int UNSUPPORTED_METHOD; +} + +template +HashedArrayDictionary::HashedArrayDictionary( + const StorageID & dict_id_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const HashedArrayDictionaryStorageConfiguration & configuration_, + BlockPtr update_field_loaded_block_) + : IDictionary(dict_id_) + , dict_struct(dict_struct_) + , source_ptr(std::move(source_ptr_)) + , configuration(configuration_) + , update_field_loaded_block(std::move(update_field_loaded_block_)) +{ + createAttributes(); + loadData(); + calculateBytesAllocated(); +} + +template +ColumnPtr HashedArrayDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & result_type, + const Columns & key_columns, + const DataTypes & key_types [[maybe_unused]], + const ColumnPtr & default_values_column) const +{ + if (dictionary_key_type == DictionaryKeyType::Complex) + dict_struct.validateKeyTypes(key_types); + + ColumnPtr result; + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena()); + + const size_t size = extractor.getKeysSize(); + + const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type); + const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; + auto & attribute = attributes[attribute_index]; + + bool is_attribute_nullable = attribute.is_index_null.has_value(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + if (attribute.is_index_null) + { + col_null_map_to = ColumnUInt8::create(size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + using ColumnProvider = DictionaryAttributeColumnProvider; + + DictionaryDefaultValueExtractor default_value_extractor(dictionary_attribute.null_value, default_values_column); + + auto column = ColumnProvider::getColumn(dictionary_attribute, size); + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + getItemsImpl( + attribute, + extractor, + [&](const size_t, const Array & value, bool) { out->insert(value); }, + default_value_extractor); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + extractor, + [&](size_t row, const StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }, + default_value_extractor); + else + getItemsImpl( + attribute, + extractor, + [&](size_t, const StringRef value, bool) { out->insertData(value.data, value.size); }, + default_value_extractor); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + extractor, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }, + default_value_extractor); + else + getItemsImpl( + attribute, + extractor, + [&](size_t row, const auto value, bool) { out[row] = value; }, + default_value_extractor); + } + + result = std::move(column); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (is_attribute_nullable) + result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + + return result; +} + +template +ColumnUInt8::Ptr HashedArrayDictionary::hasKeys(const Columns & key_columns, const DataTypes & key_types) const +{ + if (dictionary_key_type == DictionaryKeyType::Complex) + dict_struct.validateKeyTypes(key_types); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor extractor(key_columns, arena_holder.getComplexKeyArena()); + + size_t keys_size = extractor.getKeysSize(); + + auto result = ColumnUInt8::create(keys_size, false); + auto & out = result->getData(); + + if (attributes.empty()) + { + query_count.fetch_add(keys_size, std::memory_order_relaxed); + return result; + } + + size_t keys_found = 0; + + for (size_t requested_key_index = 0; requested_key_index < keys_size; ++requested_key_index) + { + auto requested_key = extractor.extractCurrentKey(); + + out[requested_key_index] = key_attribute.container.find(requested_key) != key_attribute.container.end(); + + keys_found += out[requested_key_index]; + extractor.rollbackCurrentKey(); + } + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return result; +} + +template +ColumnPtr HashedArrayDictionary::getHierarchy(ColumnPtr key_column [[maybe_unused]], const DataTypePtr &) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup_storage; + const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); + + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; + + const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; + const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + + const auto & key_attribute_container = key_attribute.container; + + const UInt64 null_value = dictionary_attribute.null_value.template get(); + const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); + + auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; + + size_t keys_found = 0; + + auto get_parent_func = [&](auto & hierarchy_key) + { + std::optional result; + + auto it = key_attribute_container.find(hierarchy_key); + + if (it != key_attribute_container.end()) + result = parent_keys_container[it->getMapped()]; + + keys_found += result.has_value(); + + return result; + }; + + auto dictionary_hierarchy_array = getKeysHierarchyArray(keys, null_value, is_key_valid_func, get_parent_func); + + query_count.fetch_add(keys.size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return dictionary_hierarchy_array; + } + else + { + return nullptr; + } +} + +template +ColumnUInt8::Ptr HashedArrayDictionary::isInHierarchy( + ColumnPtr key_column [[maybe_unused]], + ColumnPtr in_key_column [[maybe_unused]], + const DataTypePtr &) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup_storage; + const auto & keys = getColumnVectorData(this, key_column, keys_backup_storage); + + PaddedPODArray keys_in_backup_storage; + const auto & keys_in = getColumnVectorData(this, in_key_column, keys_in_backup_storage); + + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; + + const auto & dictionary_attribute = dict_struct.attributes[hierarchical_attribute_index]; + auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + + const auto & key_attribute_container = key_attribute.container; + + const UInt64 null_value = dictionary_attribute.null_value.template get(); + const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); + + auto is_key_valid_func = [&](auto & key) { return key_attribute_container.find(key) != key_attribute_container.end(); }; + + size_t keys_found = 0; + + auto get_parent_func = [&](auto & hierarchy_key) + { + std::optional result; + + auto it = key_attribute_container.find(hierarchy_key); + + if (it != key_attribute_container.end()) + result = parent_keys_container[it->getMapped()]; + + keys_found += result.has_value(); + + return result; + }; + + auto result = getKeysIsInHierarchyColumn(keys, keys_in, null_value, is_key_valid_func, get_parent_func); + + query_count.fetch_add(keys.size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return result; + } + else + { + return nullptr; + } +} + +template +ColumnPtr HashedArrayDictionary::getDescendants( + ColumnPtr key_column [[maybe_unused]], + const DataTypePtr &, + size_t level [[maybe_unused]]) const +{ + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + { + PaddedPODArray keys_backup; + const auto & keys = getColumnVectorData(this, key_column, keys_backup); + + size_t hierarchical_attribute_index = *dict_struct.hierarchical_attribute_index; + + const auto & hierarchical_attribute = attributes[hierarchical_attribute_index]; + const AttributeContainerType & parent_keys_container = std::get>(hierarchical_attribute.container); + + const auto & key_attribute_container = key_attribute.container; + + HashMap index_to_key; + index_to_key.reserve(key_attribute.container.size()); + + for (auto & [key, value] : key_attribute_container) + index_to_key[value] = key; + + HashMap> parent_to_child; + + for (size_t i = 0; i < parent_keys_container.size(); ++i) + { + const auto * it = index_to_key.find(i); + if (it == index_to_key.end()) + continue; + + auto parent_key = it->getMapped(); + auto child_key = parent_keys_container[i]; + parent_to_child[parent_key].emplace_back(child_key); + } + + size_t keys_found = 0; + auto result = getKeysDescendantsArray(keys, parent_to_child, level, keys_found); + + query_count.fetch_add(keys.size(), std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + + return result; + } + else + { + return nullptr; + } +} + +template +void HashedArrayDictionary::createAttributes() +{ + const auto size = dict_struct.attributes.size(); + attributes.reserve(size); + + for (const auto & dictionary_attribute : dict_struct.attributes) + { + auto type_call = [&, this](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + + auto is_index_null = dictionary_attribute.is_nullable ? std::make_optional>() : std::optional>{}; + std::unique_ptr string_arena = std::is_same_v ? std::make_unique() : nullptr; + Attribute attribute{dictionary_attribute.underlying_type, AttributeContainerType(), std::move(is_index_null), std::move(string_arena)}; + attributes.emplace_back(std::move(attribute)); + }; + + callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call); + } +} + +template +void HashedArrayDictionary::updateData() +{ + if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) + { + QueryPipeline pipeline(source_ptr->loadUpdatedAll()); + + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) + { + /// We are using this to keep saved data if input stream consists of multiple blocks + if (!update_field_loaded_block) + update_field_loaded_block = std::make_shared(block.cloneEmpty()); + + for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index) + { + const IColumn & update_column = *block.getByPosition(attribute_index).column.get(); + MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable(); + saved_column->insertRangeFrom(update_column, 0, update_column.size()); + } + } + } + else + { + auto pipe = source_ptr->loadUpdatedAll(); + mergeBlockWithPipe( + dict_struct.getKeysSize(), + *update_field_loaded_block, + std::move(pipe)); + } + + if (update_field_loaded_block) + { + resize(update_field_loaded_block->rows()); + blockToAttributes(*update_field_loaded_block.get()); + } +} + +template +void HashedArrayDictionary::blockToAttributes(const Block & block [[maybe_unused]]) +{ + size_t skip_keys_size_offset = dict_struct.getKeysSize(); + + Columns key_columns; + key_columns.reserve(skip_keys_size_offset); + + /// Split into keys columns and attribute columns + for (size_t i = 0; i < skip_keys_size_offset; ++i) + key_columns.emplace_back(block.safeGetByPosition(i).column); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + Field column_value_to_insert; + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + auto it = key_attribute.container.find(key); + + if (it != key_attribute.container.end()) + { + keys_extractor.rollbackCurrentKey(); + continue; + } + + if constexpr (std::is_same_v) + key = copyKeyInArena(key); + + key_attribute.container.insert({key, element_count}); + + for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + { + const IColumn & attribute_column = *block.safeGetByPosition(skip_keys_size_offset + attribute_index).column; + auto & attribute = attributes[attribute_index]; + bool attribute_is_nullable = attribute.is_index_null.has_value(); + + attribute_column.get(key_index, column_value_to_insert); + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using AttributeValueType = DictionaryValueType; + + auto & attribute_container = std::get>(attribute.container); + attribute_container.emplace_back(); + + if (attribute_is_nullable) + { + attribute.is_index_null->emplace_back(); + + if (column_value_to_insert.isNull()) + { + (*attribute.is_index_null).back() = true; + return; + } + } + + if constexpr (std::is_same_v) + { + String & value_to_insert = column_value_to_insert.get(); + size_t value_to_insert_size = value_to_insert.size(); + + const char * string_in_arena = attribute.string_arena->insert(value_to_insert.data(), value_to_insert_size); + + StringRef string_in_arena_reference = StringRef{string_in_arena, value_to_insert_size}; + attribute_container.back() = string_in_arena_reference; + } + else + { + auto value_to_insert = column_value_to_insert.get>(); + attribute_container.back() = value_to_insert; + } + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + } + + ++element_count; + keys_extractor.rollbackCurrentKey(); + } +} + +template +void HashedArrayDictionary::resize(size_t added_rows) +{ + if (unlikely(!added_rows)) + return; + + key_attribute.container.reserve(added_rows); +} + +template +template +void HashedArrayDictionary::getItemsImpl( + const Attribute & attribute, + DictionaryKeysExtractor & keys_extractor, + ValueSetter && set_value [[maybe_unused]], + DefaultValueExtractor & default_value_extractor) const +{ + const auto & key_attribute_container = key_attribute.container; + const auto & attribute_container = std::get>(attribute.container); + const size_t keys_size = keys_extractor.getKeysSize(); + + size_t keys_found = 0; + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + const auto it = key_attribute_container.find(key); + + if (it != key_attribute_container.end()) + { + size_t element_index = it->getMapped(); + + const auto & element = attribute_container[element_index]; + + if constexpr (is_nullable) + set_value(key_index, element, (*attribute.is_index_null)[element_index]); + else + set_value(key_index, element, false); + + ++keys_found; + } + else + { + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + } + + keys_extractor.rollbackCurrentKey(); + } + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} + +template +StringRef HashedArrayDictionary::copyKeyInArena(StringRef key) +{ + size_t key_size = key.size; + char * place_for_key = complex_key_arena.alloc(key_size); + memcpy(reinterpret_cast(place_for_key), reinterpret_cast(key.data), key_size); + StringRef updated_key{place_for_key, key_size}; + return updated_key; +} + +template +void HashedArrayDictionary::loadData() +{ + if (!source_ptr->hasUpdateField()) + { + QueryPipeline pipeline; + pipeline = QueryPipeline(source_ptr->loadAll()); + + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) + { + resize(block.rows()); + blockToAttributes(block); + } + } + else + { + updateData(); + } + + if (configuration.require_nonempty && 0 == element_count) + throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, + "{}: dictionary source is empty and 'require_nonempty' property is set.", + full_name); +} + +template +void HashedArrayDictionary::calculateBytesAllocated() +{ + bytes_allocated += attributes.size() * sizeof(attributes.front()); + + bytes_allocated += key_attribute.container.size(); + + for (auto & attribute : attributes) + { + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + + const auto & container = std::get>(attribute.container); + bytes_allocated += sizeof(AttributeContainerType); + + if constexpr (std::is_same_v) + { + /// It is not accurate calculations + bytes_allocated += sizeof(Array) * container.size(); + } + else + { + bytes_allocated += container.allocated_bytes(); + } + + bucket_count = container.capacity(); + + if constexpr (std::is_same_v) + bytes_allocated += sizeof(Arena) + attribute.string_arena->size(); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (attribute.string_arena) + bytes_allocated += attribute.string_arena->size(); + + if (attribute.is_index_null.has_value()) + bytes_allocated += (*attribute.is_index_null).size(); + } + + bytes_allocated += complex_key_arena.size(); + + if (update_field_loaded_block) + bytes_allocated += update_field_loaded_block->allocatedBytes(); +} + +template +Pipe HashedArrayDictionary::read(const Names & column_names, size_t max_block_size) const +{ + PaddedPODArray keys; + keys.reserve(key_attribute.container.size()); + + for (auto & [key, _] : key_attribute.container) + keys.emplace_back(key); + + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); +} + +template class HashedArrayDictionary; +template class HashedArrayDictionary; + +void registerDictionaryArrayHashed(DictionaryFactory & factory) +{ + auto create_layout = [](const std::string & full_name, + const DictionaryStructure & dict_struct, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + DictionarySourcePtr source_ptr, + DictionaryKeyType dictionary_key_type) -> DictionaryPtr + { + if (dictionary_key_type == DictionaryKeyType::Simple && dict_struct.key) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'key' is not supported for simple key hashed array dictionary"); + else if (dictionary_key_type == DictionaryKeyType::Complex && dict_struct.id) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "'id' is not supported for complex key hashed array dictionary"); + + if (dict_struct.range_min || dict_struct.range_max) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "{}: elements .structure.range_min and .structure.range_max should be defined only " + "for a dictionary of layout 'range_hashed'", + full_name); + + const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; + const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); + + HashedArrayDictionaryStorageConfiguration configuration{require_nonempty, dict_lifetime}; + + if (dictionary_key_type == DictionaryKeyType::Simple) + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + else + return std::make_unique>(dict_id, dict_struct, std::move(source_ptr), configuration); + }; + + using namespace std::placeholders; + + factory.registerLayout("hashed_array", + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple); }, false); + factory.registerLayout("complex_key_hashed_array", + [=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex); }, true); +} + +} diff --git a/src/Dictionaries/HashedArrayDictionary.h b/src/Dictionaries/HashedArrayDictionary.h new file mode 100644 index 00000000000..053813bdc44 --- /dev/null +++ b/src/Dictionaries/HashedArrayDictionary.h @@ -0,0 +1,211 @@ +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +/** This dictionary stores all attributes in arrays. + * Key is stored in hash table and value is index into attribute array. + */ + +namespace DB +{ + +struct HashedArrayDictionaryStorageConfiguration +{ + const bool require_nonempty; + const DictionaryLifetime lifetime; +}; + +template +class HashedArrayDictionary final : public IDictionary +{ +public: + using KeyType = std::conditional_t; + + HashedArrayDictionary( + const StorageID & dict_id_, + const DictionaryStructure & dict_struct_, + DictionarySourcePtr source_ptr_, + const HashedArrayDictionaryStorageConfiguration & configuration_, + BlockPtr update_field_loaded_block_ = nullptr); + + std::string getTypeName() const override + { + if constexpr (dictionary_key_type == DictionaryKeyType::Simple) + return "HashedArray"; + else + return "ComplexHashedArray"; + } + + size_t getBytesAllocated() const override { return bytes_allocated; } + + size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } + + double getFoundRate() const override + { + size_t queries = query_count.load(std::memory_order_relaxed); + if (!queries) + return 0; + return static_cast(found_count.load(std::memory_order_relaxed)) / queries; + } + + double getHitRate() const override { return 1.0; } + + size_t getElementCount() const override { return element_count; } + + double getLoadFactor() const override { return static_cast(element_count) / bucket_count; } + + std::shared_ptr clone() const override + { + return std::make_shared>(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); + } + + const IDictionarySource * getSource() const override { return source_ptr.get(); } + + const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; } + + const DictionaryStructure & getStructure() const override { return dict_struct; } + + bool isInjective(const std::string & attribute_name) const override + { + return dict_struct.getAttribute(attribute_name).injective; + } + + DictionaryKeyType getKeyType() const override { return dictionary_key_type; } + + ColumnPtr getColumn( + const std::string& attribute_name, + const DataTypePtr & result_type, + const Columns & key_columns, + const DataTypes & key_types, + const ColumnPtr & default_values_column) const override; + + ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override; + + bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); } + + ColumnPtr getHierarchy(ColumnPtr key_column, const DataTypePtr & hierarchy_attribute_type) const override; + + ColumnUInt8::Ptr isInHierarchy( + ColumnPtr key_column, + ColumnPtr in_key_column, + const DataTypePtr & key_type) const override; + + ColumnPtr getDescendants( + ColumnPtr key_column, + const DataTypePtr & key_type, + size_t level) const override; + + Pipe read(const Names & column_names, size_t max_block_size) const override; + +private: + + using KeyContainerType = std::conditional_t< + dictionary_key_type == DictionaryKeyType::Simple, + HashMap, + HashMapWithSavedHash>>; + + template + using AttributeContainerType = std::conditional_t, std::vector, PaddedPODArray>; + + struct Attribute final + { + AttributeUnderlyingType type; + + std::variant< + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType, + AttributeContainerType> + container; + + std::optional> is_index_null; + std::unique_ptr string_arena; + }; + + struct KeyAttribute final + { + + KeyContainerType container; + + }; + + void createAttributes(); + + void blockToAttributes(const Block & block); + + void updateData(); + + void loadData(); + + void calculateBytesAllocated(); + + template + void getItemsImpl( + const Attribute & attribute, + DictionaryKeysExtractor & keys_extractor, + ValueSetter && set_value, + DefaultValueExtractor & default_value_extractor) const; + + template + void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func); + + template + void getAttributeContainer(size_t attribute_index, GetContainerFunc && get_container_func) const; + + void resize(size_t added_rows); + + StringRef copyKeyInArena(StringRef key); + + const DictionaryStructure dict_struct; + const DictionarySourcePtr source_ptr; + const HashedArrayDictionaryStorageConfiguration configuration; + + std::vector attributes; + + KeyAttribute key_attribute; + + size_t bytes_allocated = 0; + size_t element_count = 0; + size_t bucket_count = 0; + mutable std::atomic query_count{0}; + mutable std::atomic found_count{0}; + + BlockPtr update_field_loaded_block; + Arena complex_key_arena; +}; + +extern template class HashedArrayDictionary; +extern template class HashedArrayDictionary; + +} diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 1f3821096da..44a0d8a62c1 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -626,6 +626,11 @@ void HashedDictionary::calculateBytesAllocated() if (attributes[i].string_arena) bytes_allocated += attributes[i].string_arena->size(); + + bytes_allocated += sizeof(attributes[i].is_nullable_set); + + if (attributes[i].is_nullable_set.has_value()) + bytes_allocated = attributes[i].is_nullable_set->getBufferSizeInBytes(); } bytes_allocated += complex_key_arena.size(); @@ -664,10 +669,7 @@ Pipe HashedDictionary::read(const Names & column_na }); } - if constexpr (dictionary_key_type == DictionaryKeyType::Simple) - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); - else - return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), keys, column_names), max_block_size)); + return Pipe(std::make_shared(DictionarySourceData(shared_from_this(), std::move(keys), column_names), max_block_size)); } template @@ -731,8 +733,18 @@ void registerDictionaryHashed(DictionaryFactory & factory) const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false); - const std::string & layout_prefix = sparse ? ".layout.sparse_hashed" : ".layout.hashed"; - const bool preallocate = config.getBool(config_prefix + layout_prefix + ".preallocate", false); + std::string dictionary_layout_name; + + if (dictionary_key_type == DictionaryKeyType::Simple) + dictionary_layout_name = "hashed"; + else + dictionary_layout_name = "complex_key_hashed"; + + if (sparse) + dictionary_layout_name = "sparse_" + dictionary_layout_name; + + const std::string dictionary_layout_prefix = ".layout." + dictionary_layout_name; + const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false); HashedDictionaryStorageConfiguration configuration{preallocate, require_nonempty, dict_lifetime}; diff --git a/src/Dictionaries/IDictionarySource.h b/src/Dictionaries/IDictionarySource.h index 661f5b8eeb8..5071b69d2bf 100644 --- a/src/Dictionaries/IDictionarySource.h +++ b/src/Dictionaries/IDictionarySource.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index 551bb1ee2dd..42683fb884c 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -1,6 +1,5 @@ #include "LibraryDictionarySource.h" -#include #include #include #include @@ -42,13 +41,7 @@ LibraryDictionarySource::LibraryDictionarySource( , context(Context::createCopy(context_)) { auto dictionaries_lib_path = context->getDictionariesLibPath(); - bool path_checked = false; - if (fs::is_symlink(path)) - path_checked = symlinkStartsWith(path, dictionaries_lib_path); - else - path_checked = pathStartsWith(path, dictionaries_lib_path); - - if (created_from_ddl && !path_checked) + if (created_from_ddl && !fileOrSymlinkPathStartsWith(path, dictionaries_lib_path)) throw Exception(ErrorCodes::PATH_ACCESS_DENIED, "File path {} is not inside {}", path, dictionaries_lib_path); if (!fs::exists(path)) diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 5fabe9cf287..f6de6ca0cc1 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -11,8 +11,8 @@ #include "registerDictionaries.h" #include #include -#include -#include +#include +#include #include diff --git a/src/Dictionaries/MySQLDictionarySource.h b/src/Dictionaries/MySQLDictionarySource.h index fa26c2b162a..1ecc41036be 100644 --- a/src/Dictionaries/MySQLDictionarySource.h +++ b/src/Dictionaries/MySQLDictionarySource.h @@ -12,7 +12,7 @@ # include "DictionaryStructure.h" # include "ExternalQueryBuilder.h" # include "IDictionarySource.h" -# include +# include namespace Poco { diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 7a34a9c2b25..0a9ba1f5ea8 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -248,6 +248,9 @@ void IPolygonDictionary::calculateBytesAllocated() { /// Index allocated by subclass not counted because it take a small part in relation to attributes and polygons + if (configuration.store_polygon_key_column) + bytes_allocated += key_attribute_column->allocatedBytes(); + for (const auto & column : attributes_columns) bytes_allocated += column->allocatedBytes(); diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index a3324b7d769..c9fb8b86b77 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -11,7 +11,7 @@ #include #include "readInvalidateQuery.h" #include -#include +#include #include #endif diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index ffe0694d8c4..7c53ecc2b2c 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Dictionaries/XDBCDictionarySource.cpp b/src/Dictionaries/XDBCDictionarySource.cpp index 80081e67b42..ba993ec5783 100644 --- a/src/Dictionaries/XDBCDictionarySource.cpp +++ b/src/Dictionaries/XDBCDictionarySource.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -17,7 +16,8 @@ #include "readInvalidateQuery.h" #include "registerDictionaries.h" #include -#include +#include +#include namespace DB diff --git a/src/Dictionaries/readInvalidateQuery.cpp b/src/Dictionaries/readInvalidateQuery.cpp index c2c42eece58..370e1457832 100644 --- a/src/Dictionaries/readInvalidateQuery.cpp +++ b/src/Dictionaries/readInvalidateQuery.cpp @@ -1,5 +1,5 @@ #include "readInvalidateQuery.h" -#include +#include #include #include #include diff --git a/src/Dictionaries/registerDictionaries.cpp b/src/Dictionaries/registerDictionaries.cpp index 8d24a6ea979..df191edd1c3 100644 --- a/src/Dictionaries/registerDictionaries.cpp +++ b/src/Dictionaries/registerDictionaries.cpp @@ -28,6 +28,7 @@ void registerDictionaryComplexKeyHashed(DictionaryFactory & factory); void registerDictionaryTrie(DictionaryFactory & factory); void registerDictionaryFlat(DictionaryFactory & factory); void registerDictionaryHashed(DictionaryFactory & factory); +void registerDictionaryArrayHashed(DictionaryFactory & factory); void registerDictionaryCache(DictionaryFactory & factory); void registerDictionaryPolygon(DictionaryFactory & factory); void registerDictionaryDirect(DictionaryFactory & factory); @@ -60,6 +61,7 @@ void registerDictionaries() registerDictionaryTrie(factory); registerDictionaryFlat(factory); registerDictionaryHashed(factory); + registerDictionaryArrayHashed(factory); registerDictionaryCache(factory); registerDictionaryPolygon(factory); registerDictionaryDirect(factory); diff --git a/src/Formats/CapnProtoUtils.cpp b/src/Formats/CapnProtoUtils.cpp new file mode 100644 index 00000000000..ecfa5df8351 --- /dev/null +++ b/src/Formats/CapnProtoUtils.cpp @@ -0,0 +1,432 @@ +#include + +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_PARSE_CAPN_PROTO_SCHEMA; + extern const int THERE_IS_NO_COLUMN; + extern const int BAD_TYPE_OF_FIELD; + extern const int CAPN_PROTO_BAD_CAST; + extern const int FILE_DOESNT_EXIST; + extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_DATA; +} + +capnp::StructSchema CapnProtoSchemaParser::getMessageSchema(const FormatSchemaInfo & schema_info) +{ + capnp::ParsedSchema schema; + try + { + int fd; + KJ_SYSCALL(fd = open(schema_info.schemaDirectory().data(), O_RDONLY)); + auto schema_dir = kj::newDiskDirectory(kj::OsFileHandle(fd)); + schema = impl.parseFromDirectory(*schema_dir, kj::Path::parse(schema_info.schemaPath()), {}); + } + catch (const kj::Exception & e) + { + /// That's not good to determine the type of error by its description, but + /// this is the only way to do it here, because kj doesn't specify the type of error. + auto description = std::string_view(e.getDescription().cStr()); + if (description.find("No such file or directory") != String::npos || description.find("no such directory") != String::npos) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot open CapnProto schema, file {} doesn't exists", schema_info.absoluteSchemaPath()); + + if (description.find("Parse error") != String::npos) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "Cannot parse CapnProto schema {}:{}", schema_info.schemaPath(), e.getLine()); + + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception while parsing CapnProro schema: {}, schema dir and file: {}, {}", description, schema_info.schemaDirectory(), schema_info.schemaPath()); + } + + auto message_maybe = schema.findNested(schema_info.messageName()); + auto * message_schema = kj::_::readMaybe(message_maybe); + if (!message_schema) + throw Exception(ErrorCodes::CANNOT_PARSE_CAPN_PROTO_SCHEMA, "CapnProto schema doesn't contain message with name {}", schema_info.messageName()); + return message_schema->asStruct(); +} + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode) +{ + if (mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE) + return boost::algorithm::to_lower_copy(first) == boost::algorithm::to_lower_copy(second); + return first == second; +} + +static const std::map capnp_simple_type_names = +{ + {capnp::schema::Type::Which::BOOL, "Bool"}, + {capnp::schema::Type::Which::VOID, "Void"}, + {capnp::schema::Type::Which::INT8, "Int8"}, + {capnp::schema::Type::Which::INT16, "Int16"}, + {capnp::schema::Type::Which::INT32, "Int32"}, + {capnp::schema::Type::Which::INT64, "Int64"}, + {capnp::schema::Type::Which::UINT8, "UInt8"}, + {capnp::schema::Type::Which::UINT16, "UInt16"}, + {capnp::schema::Type::Which::UINT32, "UInt32"}, + {capnp::schema::Type::Which::UINT64, "UInt64"}, + {capnp::schema::Type::Which::FLOAT32, "Float32"}, + {capnp::schema::Type::Which::FLOAT64, "Float64"}, + {capnp::schema::Type::Which::TEXT, "Text"}, + {capnp::schema::Type::Which::DATA, "Data"}, + {capnp::schema::Type::Which::INTERFACE, "Interface"}, + {capnp::schema::Type::Which::ANY_POINTER, "AnyPointer"}, +}; + +static bool checkIfStructContainsUnnamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() != struct_schema.getNonUnionFields().size(); +} + +static bool checkIfStructIsNamedUnion(const capnp::StructSchema & struct_schema) +{ + return struct_schema.getFields().size() == struct_schema.getUnionFields().size(); +} + +/// Get full name of type for better exception messages. +static String getCapnProtoFullTypeName(const capnp::Type & type) +{ + switch (type.which()) + { + case capnp::schema::Type::Which::STRUCT: + { + auto struct_schema = type.asStruct(); + + auto non_union_fields = struct_schema.getNonUnionFields(); + std::vector non_union_field_names; + for (auto nested_field : non_union_fields) + non_union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + auto union_fields = struct_schema.getUnionFields(); + std::vector union_field_names; + for (auto nested_field : union_fields) + union_field_names.push_back(String(nested_field.getProto().getName()) + " " + getCapnProtoFullTypeName(nested_field.getType())); + + String union_name = "Union(" + boost::algorithm::join(union_field_names, ", ") + ")"; + /// Check if the struct is a named union. + if (non_union_field_names.empty()) + return union_name; + + String type_name = "Struct(" + boost::algorithm::join(non_union_field_names, ", "); + /// Check if the struct contains unnamed union. + if (!union_field_names.empty()) + type_name += ", " + union_name; + type_name += ")"; + return type_name; + } + case capnp::schema::Type::Which::LIST: + return "List(" + getCapnProtoFullTypeName(type.asList().getElementType()) + ")"; + case capnp::schema::Type::Which::ENUM: + { + auto enum_schema = type.asEnum(); + String enum_name = "Enum("; + auto enumerants = enum_schema.getEnumerants(); + for (size_t i = 0; i != enumerants.size(); ++i) + { + enum_name += String(enumerants[i].getProto().getName()) + " = " + std::to_string(enumerants[i].getOrdinal()); + if (i + 1 != enumerants.size()) + enum_name += ", "; + } + enum_name += ")"; + return enum_name; + } + default: + auto it = capnp_simple_type_names.find(type.which()); + if (it == capnp_simple_type_names.end()) + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unknown CapnProto type"); + return it->second; + } +} + +template +static bool checkEnums(const capnp::Type & capnp_type, const DataTypePtr column_type, FormatSettings::EnumComparingMode mode, UInt64 max_value, String & error_message) +{ + if (!capnp_type.isEnum()) + return false; + + auto enum_schema = capnp_type.asEnum(); + bool to_lower = mode == FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE; + const auto * enum_type = assert_cast *>(column_type.get()); + const auto & enum_values = dynamic_cast &>(*enum_type); + + auto enumerants = enum_schema.getEnumerants(); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) + { + /// In CapnProto Enum fields are numbered sequentially starting from zero. + if (enumerants.size() > max_value) + { + error_message += "Enum from CapnProto schema contains values that is out of range for Clickhouse Enum"; + return false; + } + + auto values = enum_values.getSetOfAllValues(); + std::unordered_set capn_enum_values; + for (auto enumerant : enumerants) + capn_enum_values.insert(Type(enumerant.getOrdinal())); + auto result = values == capn_enum_values; + if (!result) + error_message += "The set of values in Enum from CapnProto schema is different from the set of values in ClickHouse Enum"; + return result; + } + + auto names = enum_values.getSetOfAllNames(to_lower); + std::unordered_set capn_enum_names; + + for (auto enumerant : enumerants) + { + String name = enumerant.getProto().getName(); + capn_enum_names.insert(to_lower ? boost::algorithm::to_lower_copy(name) : name); + } + + auto result = names == capn_enum_names; + if (!result) + error_message += "The set of names in Enum from CapnProto schema is different from the set of names in ClickHouse Enum"; + return result; +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message); + +static bool checkNullableType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + + /// Check that struct is a named union of type VOID and one arbitrary type. + auto struct_schema = capnp_type.asStruct(); + if (!checkIfStructIsNamedUnion(struct_schema)) + return false; + + auto union_fields = struct_schema.getUnionFields(); + if (union_fields.size() != 2) + return false; + + auto first = union_fields[0]; + auto second = union_fields[1]; + + auto nested_type = assert_cast(data_type.get())->getNestedType(); + if (first.getType().isVoid()) + return checkCapnProtoType(second.getType(), nested_type, mode, error_message); + if (second.getType().isVoid()) + return checkCapnProtoType(first.getType(), nested_type, mode, error_message); + return false; +} + +static bool checkTupleType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isStruct()) + return false; + auto struct_schema = capnp_type.asStruct(); + + if (checkIfStructIsNamedUnion(struct_schema)) + return false; + + if (checkIfStructContainsUnnamedUnion(struct_schema)) + { + error_message += "CapnProto struct contains unnamed union"; + return false; + } + + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + if (nested_types.size() != struct_schema.getFields().size()) + { + error_message += "Tuple and Struct types have different sizes"; + return false; + } + + if (!tuple_data_type->haveExplicitNames()) + { + error_message += "Only named Tuple can be converted to CapnProto Struct"; + return false; + } + for (const auto & name : tuple_data_type->getElementNames()) + { + KJ_IF_MAYBE(field, struct_schema.findFieldByName(name)) + { + if (!checkCapnProtoType(field->getType(), nested_types[tuple_data_type->getPositionByName(name)], mode, error_message)) + return false; + } + else + { + error_message += "CapnProto struct doesn't contain a field with name " + name; + return false; + } + } + + return true; +} + +static bool checkArrayType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + if (!capnp_type.isList()) + return false; + auto list_schema = capnp_type.asList(); + auto nested_type = assert_cast(data_type.get())->getNestedType(); + return checkCapnProtoType(list_schema.getElementType(), nested_type, mode, error_message); +} + +static bool checkCapnProtoType(const capnp::Type & capnp_type, const DataTypePtr & data_type, FormatSettings::EnumComparingMode mode, String & error_message) +{ + switch (data_type->getTypeId()) + { + case TypeIndex::UInt8: + return capnp_type.isBool() || capnp_type.isUInt8(); + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + return capnp_type.isUInt16(); + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + return capnp_type.isUInt32(); + case TypeIndex::UInt64: + return capnp_type.isUInt64(); + case TypeIndex::Int8: + return capnp_type.isInt8(); + case TypeIndex::Int16: + return capnp_type.isInt16(); + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Int32: + return capnp_type.isInt32(); + case TypeIndex::DateTime64: [[fallthrough]]; + case TypeIndex::Int64: + return capnp_type.isInt64(); + case TypeIndex::Float32: + return capnp_type.isFloat32(); + case TypeIndex::Float64: + return capnp_type.isFloat64(); + case TypeIndex::Enum8: + return checkEnums(capnp_type, data_type, mode, INT8_MAX, error_message); + case TypeIndex::Enum16: + return checkEnums(capnp_type, data_type, mode, INT16_MAX, error_message); + case TypeIndex::Tuple: + return checkTupleType(capnp_type, data_type, mode, error_message); + case TypeIndex::Nullable: + { + auto result = checkNullableType(capnp_type, data_type, mode, error_message); + if (!result) + error_message += "Nullable can be represented only as a named union of type Void and nested type"; + return result; + } + case TypeIndex::Array: + return checkArrayType(capnp_type, data_type, mode, error_message); + case TypeIndex::LowCardinality: + return checkCapnProtoType(capnp_type, assert_cast(data_type.get())->getDictionaryType(), mode, error_message); + case TypeIndex::FixedString: [[fallthrough]]; + case TypeIndex::String: + return capnp_type.isText() || capnp_type.isData(); + default: + return false; + } +} + +static std::pair splitFieldName(const String & name) +{ + const auto * begin = name.data(); + const auto * end = name.data() + name.size(); + const auto * it = find_first_symbols<'_', '.'>(begin, end); + String first = String(begin, it); + String second = it == end ? "" : String(it + 1, end); + return {first, second}; +} + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_reader.getSchema().findFieldByName(field_name)) + { + capnp::DynamicValue::Reader field_reader; + try + { + field_reader = struct_reader.get(*field); + } + catch (const kj::Exception & e) + { + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot extract field value from struct by provided schema, error: {} Perhaps the data was generated by another schema", String(e.getDescription().cStr())); + } + + if (nested_name.empty()) + return field_reader; + + if (field_reader.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getReaderByColumnName(field_reader.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, struct_builder.getSchema().findFieldByName(field_name)) + { + if (nested_name.empty()) + return {struct_builder, *field}; + + auto field_builder = struct_builder.get(*field); + if (field_builder.getType() != capnp::DynamicValue::STRUCT) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getStructBuilderAndFieldByColumnName(field_builder.as(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto struct doesn't contain field with name {}", field_name); +} + +static capnp::StructSchema::Field getFieldByName(const capnp::StructSchema & schema, const String & name) +{ + auto [field_name, nested_name] = splitFieldName(name); + KJ_IF_MAYBE(field, schema.findFieldByName(field_name)) + { + if (nested_name.empty()) + return *field; + + if (!field->getType().isStruct()) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Field {} is not a struct", field_name); + + return getFieldByName(field->getType().asStruct(), nested_name); + } + + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Capnproto schema doesn't contain field with name {}", field_name); +} + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode) +{ + /// Firstly check that struct doesn't contain unnamed union, because we don't support it. + if (checkIfStructContainsUnnamedUnion(schema)) + throw Exception(ErrorCodes::CAPN_PROTO_BAD_CAST, "Schema contains unnamed union that is not supported"); + auto names_and_types = header.getNamesAndTypesList(); + String additional_error_message; + for (auto & [name, type] : names_and_types) + { + auto field = getFieldByName(schema, name); + if (!checkCapnProtoType(field.getType(), type, mode, additional_error_message)) + { + auto e = Exception( + ErrorCodes::CAPN_PROTO_BAD_CAST, + "Cannot convert ClickHouse type {} to CapnProto type {}", + type->getName(), + getCapnProtoFullTypeName(field.getType())); + if (!additional_error_message.empty()) + e.addMessage(additional_error_message); + throw std::move(e); + } + } +} + +} + +#endif diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h new file mode 100644 index 00000000000..93ca0a5e616 --- /dev/null +++ b/src/Formats/CapnProtoUtils.h @@ -0,0 +1,43 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include + +namespace DB +{ +// Wrapper for classes that could throw in destructor +// https://github.com/capnproto/capnproto/issues/553 +template +struct DestructorCatcher +{ + T impl; + template + DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} + ~DestructorCatcher() noexcept try { } catch (...) { return; } +}; + +class CapnProtoSchemaParser : public DestructorCatcher +{ +public: + CapnProtoSchemaParser() {} + + capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); +}; + +bool compareEnumNames(const String & first, const String & second, FormatSettings::EnumComparingMode mode); + +std::pair getStructBuilderAndFieldByColumnName(capnp::DynamicStruct::Builder struct_builder, const String & name); + +capnp::DynamicValue::Reader getReaderByColumnName(const capnp::DynamicStruct::Reader & struct_reader, const String & name); + +void checkCapnProtoSchemaStructure(const capnp::StructSchema & schema, const Block & header, FormatSettings::EnumComparingMode mode); + +} + +#endif diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d3ff5cbf8a7..d2dc18a03fd 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -4,17 +4,16 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include #include +#include #include namespace DB @@ -112,6 +111,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d816ef1d016..ee3824081bb 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -1,10 +1,9 @@ #pragma once +#include #include -#include #include #include -#include #include #include @@ -35,12 +34,15 @@ struct RowOutputFormatParams; using InputFormatPtr = std::shared_ptr; using OutputFormatPtr = std::shared_ptr; +template +struct Memory; + FormatSettings getFormatSettings(ContextPtr context); template FormatSettings getFormatSettings(ContextPtr context, const T & settings); -/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format. +/** Allows to create an IInputFormat or IOutputFormat by the name of the format. * Note: format and compression are independent things. */ class FormatFactory final : private boost::noncopyable @@ -56,7 +58,7 @@ public: */ using FileSegmentationEngine = std::function( ReadBuffer & buf, - DB::Memory<> & memory, + DB::Memory> & memory, size_t min_chunk_bytes)>; /// This callback allows to perform some additional actions after writing a single row. diff --git a/src/Formats/FormatSchemaInfo.cpp b/src/Formats/FormatSchemaInfo.cpp index 2605c0bdf04..24c8dfc14f2 100644 --- a/src/Formats/FormatSchemaInfo.cpp +++ b/src/Formats/FormatSchemaInfo.cpp @@ -99,4 +99,10 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String & } } +FormatSchemaInfo::FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message) + : FormatSchemaInfo( + settings.schema.format_schema, format, require_message, settings.schema.is_server, settings.schema.format_schema_path) +{ +} + } diff --git a/src/Formats/FormatSchemaInfo.h b/src/Formats/FormatSchemaInfo.h index cb041e02116..8c430218af0 100644 --- a/src/Formats/FormatSchemaInfo.h +++ b/src/Formats/FormatSchemaInfo.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { @@ -11,6 +12,7 @@ class FormatSchemaInfo { public: FormatSchemaInfo(const String & format_schema, const String & format, bool require_message, bool is_server, const std::string & format_schema_path); + FormatSchemaInfo(const FormatSettings & settings, const String & format, bool require_message); /// Returns path to the schema file. const String & schemaPath() const { return schema_path; } diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8c894c77e82..403ccbc6763 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -183,6 +183,20 @@ struct FormatSettings { bool import_nested = false; } orc; + + /// For capnProto format we should determine how to + /// compare ClickHouse Enum and Enum from schema. + enum class EnumComparingMode + { + BY_NAMES, // Names in enums should be the same, values can be different. + BY_NAMES_CASE_INSENSITIVE, // Case-insensitive name comparison. + BY_VALUES, // Values should be the same, names can be different. + }; + + struct + { + EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES; + } capn_proto; }; } diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index b918825df79..8ef05fa584e 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h index 79dd6c6c192..2d2d4ad5531 100644 --- a/src/Formats/JSONEachRowUtils.h +++ b/src/Formats/JSONEachRowUtils.h @@ -1,5 +1,9 @@ #pragma once +#include +#include +#include + namespace DB { diff --git a/src/DataStreams/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h similarity index 100% rename from src/DataStreams/MarkInCompressedFile.h rename to src/Formats/MarkInCompressedFile.h diff --git a/src/DataStreams/NativeReader.cpp b/src/Formats/NativeReader.cpp similarity index 98% rename from src/DataStreams/NativeReader.cpp rename to src/Formats/NativeReader.cpp index 079dff80eae..9ef248dc904 100644 --- a/src/DataStreams/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include @@ -56,7 +56,6 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_, } } -// also resets few vars from IBlockInputStream (I didn't want to propagate resetParser upthere) void NativeReader::resetParser() { istr_concrete = nullptr; diff --git a/src/DataStreams/NativeReader.h b/src/Formats/NativeReader.h similarity index 96% rename from src/DataStreams/NativeReader.h rename to src/Formats/NativeReader.h index cfd58bde2cc..49c2db7703f 100644 --- a/src/DataStreams/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -1,8 +1,8 @@ #pragma once -#include -#include +#include #include +#include namespace DB { diff --git a/src/DataStreams/NativeWriter.cpp b/src/Formats/NativeWriter.cpp similarity index 98% rename from src/DataStreams/NativeWriter.cpp rename to src/Formats/NativeWriter.cpp index 6e26c443e29..9da0c312362 100644 --- a/src/DataStreams/NativeWriter.cpp +++ b/src/Formats/NativeWriter.cpp @@ -5,8 +5,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/DataStreams/NativeWriter.h b/src/Formats/NativeWriter.h similarity index 100% rename from src/DataStreams/NativeWriter.h rename to src/Formats/NativeWriter.h diff --git a/src/DataStreams/TemporaryFileStream.cpp b/src/Formats/TemporaryFileStream.cpp similarity index 66% rename from src/DataStreams/TemporaryFileStream.cpp rename to src/Formats/TemporaryFileStream.cpp index 826cf5508d8..b19c4aeff35 100644 --- a/src/DataStreams/TemporaryFileStream.cpp +++ b/src/Formats/TemporaryFileStream.cpp @@ -1,8 +1,6 @@ -#include -#include -#include -#include -#include +#include +#include +#include #include #include #include @@ -43,27 +41,4 @@ void TemporaryFileStream::write(const std::string & path, const Block & header, compressed_buf.finalize(); } -TemporaryFileLazySource::TemporaryFileLazySource(const std::string & path_, const Block & header_) - : ISource(header_) - , path(path_) - , done(false) -{} - -Chunk TemporaryFileLazySource::generate() -{ - if (done) - return {}; - - if (!stream) - stream = std::make_unique(path, header); - - auto block = stream->block_in->read(); - if (!block) - { - done = true; - stream.reset(); - } - return Chunk(block.getColumns(), block.rows()); -} - } diff --git a/src/DataStreams/TemporaryFileStream.h b/src/Formats/TemporaryFileStream.h similarity index 57% rename from src/DataStreams/TemporaryFileStream.h rename to src/Formats/TemporaryFileStream.h index c0c13605928..4a2aa2d55e0 100644 --- a/src/DataStreams/TemporaryFileStream.h +++ b/src/Formats/TemporaryFileStream.h @@ -1,11 +1,10 @@ #pragma once #include -#include +#include #include #include -#include -#include +#include namespace DB { @@ -24,22 +23,4 @@ struct TemporaryFileStream static void write(const std::string & path, const Block & header, QueryPipelineBuilder builder, const std::string & codec); }; - -class TemporaryFileLazySource : public ISource -{ -public: - TemporaryFileLazySource(const std::string & path_, const Block & header_); - String getName() const override { return "TemporaryFileLazySource"; } - -protected: - Chunk generate() override; - -private: - const std::string path; - Block header; - bool done; - - std::unique_ptr stream; -}; - } diff --git a/src/DataStreams/formatBlock.cpp b/src/Formats/formatBlock.cpp similarity index 86% rename from src/DataStreams/formatBlock.cpp rename to src/Formats/formatBlock.cpp index dab321be2e1..d2b401207aa 100644 --- a/src/DataStreams/formatBlock.cpp +++ b/src/Formats/formatBlock.cpp @@ -1,8 +1,8 @@ #include -#include +#include #include #include -#include +#include #include namespace DB diff --git a/src/DataStreams/formatBlock.h b/src/Formats/formatBlock.h similarity index 100% rename from src/DataStreams/formatBlock.h rename to src/Formats/formatBlock.h diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 3e4c0366e8a..acaf6f28492 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -67,6 +67,7 @@ void registerOutputFormatNull(FormatFactory & factory); void registerOutputFormatMySQLWire(FormatFactory & factory); void registerOutputFormatMarkdown(FormatFactory & factory); void registerOutputFormatPostgreSQLWire(FormatFactory & factory); +void registerOutputFormatCapnProto(FormatFactory & factory); /// Input only formats. @@ -139,6 +140,7 @@ void registerFormats() registerOutputFormatMySQLWire(factory); registerOutputFormatMarkdown(factory); registerOutputFormatPostgreSQLWire(factory); + registerOutputFormatCapnProto(factory); registerInputFormatRegexp(factory); registerInputFormatJSONAsString(factory); diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index d59ead2ebf5..4c27f090fb2 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -50,36 +50,33 @@ public: throw Exception{"JSONPath functions require at least 2 arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; } - const auto & first_column = arguments[0]; + const auto & json_column = arguments[0]; - /// Check 1 argument: must be of type String (JSONPath) - if (!isString(first_column.type)) + if (!isString(json_column.type)) { throw Exception( - "JSONPath functions require 1 argument to be JSONPath of type string, illegal type: " + first_column.type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - /// Check 1 argument: must be const (JSONPath) - if (!isColumnConst(*first_column.column)) - { - throw Exception("1 argument (JSONPath) must be const", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - - const auto & second_column = arguments[1]; - - /// Check 2 argument: must be of type String (JSON) - if (!isString(second_column.type)) - { - throw Exception( - "JSONPath functions require 2 argument to be JSON of string, illegal type: " + second_column.type->getName(), + "JSONPath functions require first argument to be JSON of string, illegal type: " + json_column.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - const ColumnPtr & arg_jsonpath = first_column.column; + const auto & json_path_column = arguments[1]; + + if (!isString(json_path_column.type)) + { + throw Exception( + "JSONPath functions require second argument to be JSONPath of type string, illegal type: " + json_path_column.type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + if (!isColumnConst(*json_path_column.column)) + { + throw Exception("Second argument (JSONPath) must be constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + const ColumnPtr & arg_jsonpath = json_path_column.column; const auto * arg_jsonpath_const = typeid_cast(arg_jsonpath.get()); const auto * arg_jsonpath_string = typeid_cast(arg_jsonpath_const->getDataColumnPtr().get()); - const ColumnPtr & arg_json = second_column.column; + const ColumnPtr & arg_json = json_column.column; const auto * col_json_const = typeid_cast(arg_json.get()); const auto * col_json_string = typeid_cast(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get()); @@ -152,7 +149,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index 31356deb3fe..945090781dc 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1088,7 +1088,7 @@ public: if (!((both_represented_by_number && !has_date) /// Do not allow to compare date and number. || (left.isStringOrFixedString() || right.isStringOrFixedString()) /// Everything can be compared with string by conversion. /// You can compare the date, datetime, or datatime64 and an enumeration with a constant string. - || ((left.isDate() || left.isDateTime() || left.isDateTime64()) && (right.isDate() || right.isDateTime() || right.isDateTime64()) && left.idx == right.idx) /// only date vs date, or datetime vs datetime + || ((left.isDate() || left.isDate32() || left.isDateTime() || left.isDateTime64()) && (right.isDate() || right.isDate32() || right.isDateTime() || right.isDateTime64()) && left.idx == right.idx) /// only date vs date, or datetime vs datetime || (left.isUUID() && right.isUUID()) || (left.isEnum() && right.isEnum() && arguments[0]->getName() == arguments[1]->getName()) /// only equivalent enum type values can be compared against || (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size()) @@ -1178,8 +1178,8 @@ public: const bool left_is_string = isStringOrFixedString(which_left); const bool right_is_string = isStringOrFixedString(which_right); - bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDateTime() || which_left.isDateTime64()) - && (which_right.isDate() || which_right.isDateTime() || which_right.isDateTime64()); + bool date_and_datetime = (which_left.idx != which_right.idx) && (which_left.isDate() || which_left.isDate32() || which_left.isDateTime() || which_left.isDateTime64()) + && (which_right.isDate() || which_right.isDate32() || which_right.isDateTime() || which_right.isDateTime64()); ColumnPtr res; if (left_is_num && right_is_num && !date_and_datetime) @@ -1222,8 +1222,8 @@ public: } else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))) { - // Comparing Date and DateTime64 requires implicit conversion, - if (date_and_datetime && (isDate(left_type) || isDate(right_type))) + // Comparing Date/Date32 and DateTime64 requires implicit conversion, + if (date_and_datetime && (isDateOrDate32(left_type) || isDateOrDate32(right_type))) { DataTypePtr common_type = getLeastSupertype({left_type, right_type}); ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type); @@ -1247,8 +1247,10 @@ public: ColumnPtr c0_converted = castColumn(col_with_type_and_name_left, common_type); ColumnPtr c1_converted = castColumn(col_with_type_and_name_right, common_type); if (!((res = executeNumLeftType(c0_converted.get(), c1_converted.get())) - || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())))) - throw Exception("Date related common types can only be UInt32 or UInt64", ErrorCodes::LOGICAL_ERROR); + || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())) + || (res = executeNumLeftType(c0_converted.get(), c1_converted.get())) + || (res = executeDecimal({c0_converted, common_type, "left"}, {c1_converted, common_type, "right"})))) + throw Exception("Date related common types can only be UInt32/UInt64/Int32/Decimal", ErrorCodes::LOGICAL_ERROR); return res; } else if (left_type->equals(*right_type)) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 95db7a9af25..9238cc81c37 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -301,7 +301,7 @@ struct ToDateTimeImpl return time_zone.fromDayNum(DayNum(d)); } - static inline UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.fromDayNum(ExtendedDayNum(d)); } @@ -638,7 +638,7 @@ struct ToDateTime64Transform inline DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const { const auto dt = ToDateTimeImpl::execute(d, time_zone); - return execute(dt, time_zone); + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); } inline DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 6889a0d44b9..e861e99861b 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -38,7 +39,6 @@ #include #include #include -#include #include @@ -58,6 +58,11 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +template +concept HasIndexOperator = requires (T t) +{ + t[0]; +}; /// Functions to parse JSONs and extract values from it. /// The first argument of all these functions gets a JSON, @@ -279,7 +284,7 @@ private: return true; } - if constexpr (FunctionJSONHelpersDetails::has_index_operator::value) + if constexpr (HasIndexOperator) { if (element.isObject()) { @@ -739,6 +744,8 @@ public: } }; +template +class JSONExtractRawImpl; template class JSONExtractStringImpl @@ -755,9 +762,12 @@ public: static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &) { - if (!element.isString()) + if (element.isNull()) return false; + if (!element.isString()) + return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); + auto str = element.getString(); ColumnString & col_str = assert_cast(dest); col_str.insertData(str.data(), str.size()); @@ -765,9 +775,6 @@ public: } }; -template -class JSONExtractRawImpl; - /// Nodes of the extract tree. We need the extract tree to extract from JSON complex values containing array, tuples or nullables. template struct JSONExtractTree @@ -851,12 +858,7 @@ struct JSONExtractTree public: bool insertResultToColumn(IColumn & dest, const Element & element) override { - if (element.isString()) - return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); - else if (element.isNull()) - return false; - else - return JSONExtractRawImpl::insertResultToColumn(dest, element, {}); + return JSONExtractStringImpl::insertResultToColumn(dest, element, {}); } }; diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h deleted file mode 100644 index 4ef43eb637f..00000000000 --- a/src/Functions/FunctionsJSON.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -namespace FunctionJSONHelpersDetails -{ - template - struct has_index_operator : std::false_type {}; - - template - struct has_index_operator()[0])>> : std::true_type {}; -} - -} diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 9ba7e822bfe..46ebab7f5fe 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -96,6 +96,9 @@ struct ReplaceRegexpImpl re2_st::StringPiece matches[max_captures]; size_t start_pos = 0; + bool is_first_match = true; + bool is_start_pos_added_one = false; + while (start_pos < static_cast(input.length())) { /// If no more replacements possible for current string @@ -103,6 +106,9 @@ struct ReplaceRegexpImpl if (searcher.Match(input, start_pos, input.length(), re2_st::RE2::Anchor::UNANCHORED, matches, num_captures)) { + if (is_start_pos_added_one) + start_pos -= 1; + const auto & match = matches[0]; size_t bytes_to_copy = (match.data() - input.data()) - start_pos; @@ -112,6 +118,13 @@ struct ReplaceRegexpImpl res_offset += bytes_to_copy; start_pos += bytes_to_copy + match.length(); + /// To avoid infinite loop. + if (is_first_match && match.length() == 0 && !replace_one && input.length() > 1) + { + start_pos += 1; + is_start_pos_added_one = true; + } + /// Do substitution instructions for (const auto & it : instructions) { @@ -129,8 +142,9 @@ struct ReplaceRegexpImpl } } - if (replace_one || match.length() == 0) /// Stop after match of zero length, to avoid infinite loop. + if (replace_one || (!is_first_match && match.length() == 0)) can_finish_current_string = true; + is_first_match = false; } else can_finish_current_string = true; diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index 20341cbe1dc..ee9696cf34f 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index e8bd136e704..02db90bfc43 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -40,6 +40,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0}; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override; diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 1122d4892c6..3e5242d5f9b 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -124,8 +124,8 @@ public: */ struct Instruction { - const IColumn * condition = nullptr; - const IColumn * source = nullptr; + IColumn::Ptr condition = nullptr; + IColumn::Ptr source = nullptr; bool condition_always_true = false; bool condition_is_nullable = false; @@ -160,15 +160,15 @@ public: } else { - const ColumnWithTypeAndName & cond_col = arguments[i]; + IColumn::Ptr cond_col = arguments[i].column->convertToFullColumnIfLowCardinality(); /// We skip branches that are always false. /// If we encounter a branch that is always true, we can finish. - if (cond_col.column->onlyNull()) + if (cond_col->onlyNull()) continue; - if (const auto * column_const = checkAndGetColumn(*cond_col.column)) + if (const auto * column_const = checkAndGetColumn(*cond_col)) { Field value = column_const->getField(); @@ -181,26 +181,24 @@ public: } else { - if (isColumnNullable(*cond_col.column)) - instruction.condition_is_nullable = true; - - instruction.condition = cond_col.column.get(); + instruction.condition = cond_col; + instruction.condition_is_nullable = instruction.condition->isNullable(); } - instruction.condition_is_short = cond_col.column->size() < arguments[0].column->size(); + instruction.condition_is_short = cond_col->size() < arguments[0].column->size(); } const ColumnWithTypeAndName & source_col = arguments[source_idx]; instruction.source_is_short = source_col.column->size() < arguments[0].column->size(); if (source_col.type->equals(*return_type)) { - instruction.source = source_col.column.get(); + instruction.source = source_col.column; } else { /// Cast all columns to result type. converted_columns_holder.emplace_back(castColumn(source_col, return_type)); - instruction.source = converted_columns_holder.back().get(); + instruction.source = converted_columns_holder.back(); } if (instruction.source && isColumnConst(*instruction.source)) diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index 14e12fb310c..c3ae6516e0f 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -18,10 +18,10 @@ namespace ErrorCodes template -class FunctionReadWkt : public IFunction +class FunctionReadWKT : public IFunction { public: - explicit FunctionReadWkt() = default; + explicit FunctionReadWKT() = default; static constexpr const char * name = NameHolder::name; @@ -72,36 +72,36 @@ public: static FunctionPtr create(ContextPtr) { - return std::make_shared>(); + return std::make_shared>(); } }; -struct ReadWktPointNameHolder +struct ReadWKTPointNameHolder { - static constexpr const char * name = "readWktPoint"; + static constexpr const char * name = "readWKTPoint"; }; -struct ReadWktRingNameHolder +struct ReadWKTRingNameHolder { - static constexpr const char * name = "readWktRing"; + static constexpr const char * name = "readWKTRing"; }; -struct ReadWktPolygonNameHolder +struct ReadWKTPolygonNameHolder { - static constexpr const char * name = "readWktPolygon"; + static constexpr const char * name = "readWKTPolygon"; }; -struct ReadWktMultiPolygonNameHolder +struct ReadWKTMultiPolygonNameHolder { - static constexpr const char * name = "readWktMultiPolygon"; + static constexpr const char * name = "readWKTMultiPolygon"; }; -void registerFunctionReadWkt(FunctionFactory & factory) +void registerFunctionReadWKT(FunctionFactory & factory) { - factory.registerFunction, ReadWktPointNameHolder>>(); - factory.registerFunction, ReadWktRingNameHolder>>(); - factory.registerFunction, ReadWktPolygonNameHolder>>(); - factory.registerFunction, ReadWktMultiPolygonNameHolder>>(); + factory.registerFunction, ReadWKTPointNameHolder>>(); + factory.registerFunction, ReadWKTRingNameHolder>>(); + factory.registerFunction, ReadWKTPolygonNameHolder>>(); + factory.registerFunction, ReadWKTMultiPolygonNameHolder>>(); } } diff --git a/src/Functions/registerFunctionsGeo.cpp b/src/Functions/registerFunctionsGeo.cpp index a0ae38f6b85..fd55c9cc20a 100644 --- a/src/Functions/registerFunctionsGeo.cpp +++ b/src/Functions/registerFunctionsGeo.cpp @@ -23,7 +23,7 @@ void registerFunctionGeohashEncode(FunctionFactory & factory); void registerFunctionGeohashDecode(FunctionFactory & factory); void registerFunctionGeohashesInBox(FunctionFactory & factory); void registerFunctionWkt(FunctionFactory & factory); -void registerFunctionReadWkt(FunctionFactory & factory); +void registerFunctionReadWKT(FunctionFactory & factory); void registerFunctionSvg(FunctionFactory & factory); #if USE_H3 @@ -79,7 +79,7 @@ void registerFunctionsGeo(FunctionFactory & factory) registerFunctionGeohashDecode(factory); registerFunctionGeohashesInBox(factory); registerFunctionWkt(factory); - registerFunctionReadWkt(factory); + registerFunctionReadWKT(factory); registerFunctionSvg(factory); #if USE_H3 diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index 4495e668add..b3a89c0393c 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -102,6 +102,7 @@ public: void registerFunctionSvg(FunctionFactory & factory) { factory.registerFunction(); + factory.registerAlias("SVG", "svg"); } } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.h b/src/IO/AsynchronousReadBufferFromFileDescriptor.h index c64341089d0..50d8f5819fe 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.h +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.h @@ -40,6 +40,7 @@ public: : ReadBufferFromFileBase(buf_size, existing_memory, alignment), reader(std::move(reader_)), priority(priority_), required_alignment(alignment), fd(fd_) { + prefetch_buffer.alignment = alignment; } ~AsynchronousReadBufferFromFileDescriptor() override; diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h index bcc36305021..d15297637a3 100644 --- a/src/IO/BitHelpers.h +++ b/src/IO/BitHelpers.h @@ -7,17 +7,6 @@ #include #include -#if defined(__OpenBSD__) || defined(__FreeBSD__) || defined (__ANDROID__) -# include -#elif defined(__sun) -# include -#elif defined(__APPLE__) -# include - -# define htobe64(x) OSSwapHostToBigInt64(x) -# define be64toh(x) OSSwapBigToHostInt64(x) -#endif - namespace DB { @@ -152,7 +141,7 @@ private: memcpy(&tmp_buffer, source_current, bytes_to_read); source_current += bytes_to_read; - tmp_buffer = be64toh(tmp_buffer); + tmp_buffer = __builtin_bswap64(tmp_buffer); bits_buffer |= BufferType(tmp_buffer) << ((sizeof(BufferType) - sizeof(tmp_buffer)) * 8 - bits_count); bits_count += static_cast(bytes_to_read) * 8; @@ -200,7 +189,7 @@ public: capacity = BIT_BUFFER_SIZE - bits_count; } -// write low bits of value as high bits of bits_buffer + // write low bits of value as high bits of bits_buffer const UInt64 mask = maskLowBits(bits_to_write); BufferType v = value & mask; v <<= capacity - bits_to_write; @@ -212,7 +201,7 @@ public: // flush contents of bits_buffer to the dest_current, partial bytes are completed with zeroes. inline void flush() { - bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align UP to 8-bytes, so doFlush will write ALL data from bits_buffer + bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align up to 8-bytes, so doFlush will write all data from bits_buffer while (bits_count != 0) doFlush(); } @@ -231,13 +220,12 @@ private: if (available < to_write) { - throw Exception("Can not write past end of buffer. Space available " - + std::to_string(available) + " bytes, required to write: " - + std::to_string(to_write) + ".", - ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER); + throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, + "Can not write past end of buffer. Space available {} bytes, required to write {} bytes.", + available, to_write); } - const auto tmp_buffer = htobe64(static_cast(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8)); + const auto tmp_buffer = __builtin_bswap64(static_cast(bits_buffer >> (sizeof(bits_buffer) - sizeof(UInt64)) * 8)); memcpy(dest_current, &tmp_buffer, to_write); dest_current += to_write; diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 1d9267a8518..0d571d6ae7c 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -88,7 +88,11 @@ struct Memory : boost::noncopyable, Allocator } else { - size_t new_capacity = align(new_size + pad_right, alignment); + size_t new_capacity = align(new_size, alignment) + pad_right; + + size_t diff = new_capacity - m_capacity; + ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, diff); + m_data = static_cast(Allocator::realloc(m_data, m_capacity, new_capacity, alignment)); m_capacity = new_capacity; m_size = m_capacity - pad_right; @@ -101,6 +105,9 @@ private: if (!alignment) return value; + if (!(value % alignment)) + return value; + return (value + alignment - 1) / alignment * alignment; } @@ -112,12 +119,10 @@ private: return; } - size_t padded_capacity = m_capacity + pad_right; - ProfileEvents::increment(ProfileEvents::IOBufferAllocs); - ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, padded_capacity); + ProfileEvents::increment(ProfileEvents::IOBufferAllocBytes, m_capacity); - size_t new_capacity = align(padded_capacity, alignment); + size_t new_capacity = align(m_capacity, alignment) + pad_right; m_data = static_cast(Allocator::alloc(new_capacity, alignment)); m_capacity = new_capacity; m_size = m_capacity - pad_right; diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 7118de844f2..c00eea98ff4 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -121,7 +121,7 @@ struct Progress /** Callback to track the progress of the query. - * Used in IBlockInputStream and Context. + * Used in QueryPipeline and Context. * The function takes the number of rows in the last block, the number of bytes in the last block. * Note that the callback can be called from different threads. */ diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index ffa050b71c8..5fe0fda88cd 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1120,7 +1121,7 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf) } } -void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current) +void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current) { assert(current >= in.position()); assert(current <= in.buffer().end()); @@ -1140,7 +1141,7 @@ void saveUpToPosition(ReadBuffer & in, DB::Memory<> & memory, char * current) in.position() = current; } -bool loadAtPosition(ReadBuffer & in, DB::Memory<> & memory, char * & current) +bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current) { assert(current <= in.buffer().end()); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ca6affbf907..fda8c213ebf 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -29,7 +30,6 @@ #include #include #include -#include #include #include @@ -41,6 +41,9 @@ static constexpr auto DEFAULT_MAX_STRING_SIZE = 1_GiB; namespace DB { +template +struct Memory; + namespace ErrorCodes { extern const int CANNOT_PARSE_DATE; @@ -276,29 +279,39 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) { case '+': { - if (has_sign || has_number) + /// 123+ or +123+, just stop after 123 or +123. + if (has_number) + goto end; + + /// No digits read yet, but we already read sign, like ++, -+. + if (has_sign) { if constexpr (throw_exception) throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters or intermediate sign character", + "Cannot parse number with multiple sign (+/-) characters", ErrorCodes::CANNOT_PARSE_NUMBER); else return ReturnType(false); } + has_sign = true; break; } case '-': { - if (has_sign || has_number) + if (has_number) + goto end; + + if (has_sign) { if constexpr (throw_exception) throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters or intermediate sign character", + "Cannot parse number with multiple sign (+/-) characters", ErrorCodes::CANNOT_PARSE_NUMBER); else return ReturnType(false); } + if constexpr (is_signed_v) negative = true; else @@ -1290,7 +1303,7 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf); /** This function just copies the data from buffer's internal position (in.position()) * to current position (from arguments) into memory. */ -void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current); +void saveUpToPosition(ReadBuffer & in, Memory> & memory, char * current); /** This function is negative to eof(). * In fact it returns whether the data was loaded to internal ReadBuffers's buffer or not. @@ -1299,7 +1312,7 @@ void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current); * of our buffer and the current cursor in the end of the buffer. When we call eof() it calls next(). * And this function can fill the buffer with new data, so we will lose the data from previous buffer state. */ -bool loadAtPosition(ReadBuffer & in, Memory<> & memory, char * & current); +bool loadAtPosition(ReadBuffer & in, Memory> & memory, char * & current); struct PcgDeserializer diff --git a/src/IO/ThreadPoolReader.cpp b/src/IO/ThreadPoolReader.cpp index 514075569f6..701fa759848 100644 --- a/src/IO/ThreadPoolReader.cpp +++ b/src/IO/ThreadPoolReader.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -151,6 +152,7 @@ std::future ThreadPoolReader::submit(Request reques else { bytes_read += res; + __msan_unpoison(request.buf, res); } } diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 98d33db3021..5d7483c45c0 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -120,7 +120,12 @@ private: void visit(const ASTTableIdentifier & identifier, ASTPtr & ast) const { if (!identifier.compound()) - ast = std::make_shared(database_name, identifier.name()); + { + auto qualified_identifier = std::make_shared(database_name, identifier.name()); + if (!identifier.alias.empty()) + qualified_identifier->setAlias(identifier.alias); + ast = qualified_identifier; + } } void visit(ASTSubquery & subquery, ASTPtr &) const diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 63e3577af55..4f4b981b44d 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -9,8 +9,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 85ce83868c6..3c53769e128 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -19,8 +19,7 @@ #include #include -#include -#include +#include #include @@ -44,8 +43,6 @@ namespace ErrorCodes extern const int UNKNOWN_AGGREGATED_DATA_VARIANT; } -class IBlockOutputStream; - /** Different data structures that can be used for aggregation * For efficiency, the aggregation data itself is put into the pool. * Data and pool ownership (states of aggregate functions) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index b4ce9f352a2..cc04c5f013f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include #include @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 95b279fd59b..0db07267231 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 98acc786aa9..44d0f837413 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -268,6 +267,9 @@ struct ContextSharedPart bool shutdown_called = false; + /// Has background executors for MergeTree tables been initialized? + bool is_background_executors_initialized = false; + Stopwatch uptime_watch; Context::ApplicationType application_type = Context::ApplicationType::SERVER; @@ -1807,6 +1809,68 @@ zkutil::ZooKeeperPtr Context::getZooKeeper() const return shared->zookeeper; } +namespace +{ + +bool checkZooKeeperConfigIsLocal(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) +{ + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_name, keys); + + for (const auto & key : keys) + { + if (startsWith(key, "node")) + { + String host = config.getString(config_name + "." + key + ".host"); + if (isLocalAddress(DNSResolver::instance().resolveHost(host))) + return true; + } + } + return false; +} + +} + + +bool Context::tryCheckClientConnectionToMyKeeperCluster() const +{ + try + { + /// If our server is part of main Keeper cluster + if (checkZooKeeperConfigIsLocal(getConfigRef(), "zookeeper")) + { + LOG_DEBUG(shared->log, "Keeper server is participant of the main zookeeper cluster, will try to connect to it"); + getZooKeeper(); + /// Connected, return true + return true; + } + else + { + Poco::Util::AbstractConfiguration::Keys keys; + getConfigRef().keys("auxiliary_zookeepers", keys); + + /// If our server is part of some auxiliary_zookeeper + for (const auto & aux_zk_name : keys) + { + if (checkZooKeeperConfigIsLocal(getConfigRef(), "auxiliary_zookeepers." + aux_zk_name)) + { + LOG_DEBUG(shared->log, "Our Keeper server is participant of the auxiliary zookeeper cluster ({}), will try to connect to it", aux_zk_name); + getAuxiliaryZooKeeper(aux_zk_name); + /// Connected, return true + return true; + } + } + } + + /// Our server doesn't depend on our Keeper cluster + return true; + } + catch (...) + { + return false; + } +} + UInt32 Context::getZooKeeperSessionUptime() const { std::lock_guard lock(shared->zookeeper_mutex); @@ -1834,19 +1898,33 @@ void Context::setSystemZooKeeperLogAfterInitializationIfNeeded() zk.second->setZooKeeperLog(shared->system_logs->zookeeper_log); } -void Context::initializeKeeperDispatcher() const +void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const { #if USE_NURAFT std::lock_guard lock(shared->keeper_storage_dispatcher_mutex); + if (shared->keeper_storage_dispatcher) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); const auto & config = getConfigRef(); if (config.has("keeper_server")) { + bool is_standalone_app = getApplicationType() == ApplicationType::KEEPER; + if (start_async) + { + assert(!is_standalone_app); + LOG_INFO(shared->log, "Connected to ZooKeeper (or Keeper) before internal Keeper start or we don't depend on our Keeper cluster" + ", will wait for Keeper asynchronously"); + } + else + { + LOG_INFO(shared->log, "Cannot connect to ZooKeeper (or Keeper) before internal Keeper start," + "will wait for Keeper synchronously"); + } + shared->keeper_storage_dispatcher = std::make_shared(); - shared->keeper_storage_dispatcher->initialize(config, getApplicationType() == ApplicationType::KEEPER); + shared->keeper_storage_dispatcher->initialize(config, is_standalone_app, start_async); } #endif } @@ -2590,6 +2668,7 @@ void Context::setFormatSchemaPath(const String & path) Context::SampleBlockCache & Context::getSampleBlockCache() const { + assert(hasQueryContext()); return getQueryContext()->sample_block_cache; } @@ -2895,8 +2974,12 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } -void Context::initializeBackgroundExecutors() +void Context::initializeBackgroundExecutorsIfNeeded() { + auto lock = getLock(); + if (shared->is_background_executors_initialized) + return; + const size_t max_merges_and_mutations = getSettingsRef().background_pool_size * getSettingsRef().background_merges_mutations_concurrency_ratio; /// With this executor we can execute more tasks than threads we have @@ -2943,6 +3026,8 @@ void Context::initializeBackgroundExecutors() LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", getSettingsRef().background_common_pool_size, getSettingsRef().background_common_pool_size); + + shared->is_background_executors_initialized = true; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 247dbc74f22..8518275e529 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -14,6 +13,7 @@ #include #include #include +#include #include #if !defined(ARCADIA_BUILD) @@ -636,13 +636,13 @@ public: const Settings & getSettingsRef() const { return settings; } void setProgressCallback(ProgressCallback callback); - /// Used in InterpreterSelectQuery to pass it to the IBlockInputStream. + /// Used in executeQuery() to pass it to the QueryPipeline. ProgressCallback getProgressCallback() const; void setFileProgressCallback(FileProgressCallback && callback) { file_progress_callback = callback; } FileProgressCallback getFileProgressCallback() const { return file_progress_callback; } - /** Set in executeQuery and InterpreterSelectQuery. Then it is used in IBlockInputStream, + /** Set in executeQuery and InterpreterSelectQuery. Then it is used in QueryPipeline, * to update and monitor information about the total number of resources spent for the query. */ void setProcessListElement(QueryStatus * elem); @@ -665,12 +665,18 @@ public: /// Same as above but return a zookeeper connection from auxiliary_zookeepers configuration entry. std::shared_ptr getAuxiliaryZooKeeper(const String & name) const; + /// Try to connect to Keeper using get(Auxiliary)ZooKeeper. Useful for + /// internal Keeper start (check connection to some other node). Return true + /// if connected successfully (without exception) or our zookeeper client + /// connection configured for some other cluster without our node. + bool tryCheckClientConnectionToMyKeeperCluster() const; + UInt32 getZooKeeperSessionUptime() const; #if USE_NURAFT std::shared_ptr & getKeeperDispatcher() const; #endif - void initializeKeeperDispatcher() const; + void initializeKeeperDispatcher(bool start_async) const; void shutdownKeeperDispatcher() const; /// Set auxiliary zookeepers configuration at server starting or configuration reloading. @@ -861,7 +867,7 @@ public: void setReadTaskCallback(ReadTaskCallback && callback); /// Background executors related methods - void initializeBackgroundExecutors(); + void initializeBackgroundExecutorsIfNeeded(); MergeMutateBackgroundExecutorPtr getMergeMutateExecutor() const; OrdinaryBackgroundExecutorPtr getMovesExecutor() const; diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 2b858512b98..2117eec0063 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 89d7624f203..3cb3c1b47ab 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -35,7 +35,6 @@ #include #include -#include #include @@ -891,9 +890,10 @@ static std::unique_ptr buildJoinedPlan( * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. * - this function shows the expression JOIN _data1. + * - JOIN tables will need aliases to correctly resolve USING clause. */ auto interpreter = interpretSubquery( - join_element.table_expression, context, original_right_columns, query_options.copy().setWithAllColumns()); + join_element.table_expression, context, original_right_columns, query_options.copy().setWithAllColumns().ignoreAlias(false)); auto joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); { diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index c785b085a57..b6bb3c5fad5 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 0ba036a6eaf..1d112a7c548 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -23,7 +23,6 @@ #include -#include #include #include diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index f1f1198e7d9..f41f63a6a55 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -21,7 +21,7 @@ #include #include -#include +#include #include diff --git a/src/Interpreters/IInterpreter.h b/src/Interpreters/IInterpreter.h index 1b4eada3c9f..665a46190fd 100644 --- a/src/Interpreters/IInterpreter.h +++ b/src/Interpreters/IInterpreter.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 90d5da35df8..ace82f47f05 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -270,6 +270,7 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS required_access.emplace_back(AccessType::ALTER_ORDER_BY, database, table); break; } + case ASTAlterCommand::REMOVE_SAMPLE_BY: case ASTAlterCommand::MODIFY_SAMPLE_BY: { required_access.emplace_back(AccessType::ALTER_SAMPLE_BY, database, table); diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index ccb5f4040ec..fe331985aa4 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -1,21 +1,23 @@ +#include + +#include + #include #include #include #include #include #include -#include #include #include #include -#include + namespace DB { namespace ErrorCodes { - extern const int UNKNOWN_IDENTIFIER; extern const int CANNOT_CREATE_RECURSIVE_FUNCTION; extern const int UNSUPPORTED_METHOD; } @@ -31,20 +33,32 @@ BlockIO InterpreterCreateFunctionQuery::execute() if (!create_function_query) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Expected CREATE FUNCTION query"); + auto & user_defined_function_factory = UserDefinedSQLFunctionFactory::instance(); + auto & function_name = create_function_query->function_name; + + bool if_not_exists = create_function_query->if_not_exists; + bool replace = create_function_query->or_replace; + + create_function_query->if_not_exists = false; + create_function_query->or_replace = false; + + if (if_not_exists && user_defined_function_factory.tryGet(function_name) != nullptr) + return {}; + validateFunction(create_function_query->function_core, function_name); - UserDefinedSQLFunctionFactory::instance().registerFunction(function_name, query_ptr); + user_defined_function_factory.registerFunction(function_name, query_ptr, replace); - if (!persist_function) + if (persist_function) { try { - UserDefinedSQLObjectsLoader::instance().storeObject(current_context, UserDefinedSQLObjectType::Function, function_name, *query_ptr); + UserDefinedSQLObjectsLoader::instance().storeObject(current_context, UserDefinedSQLObjectType::Function, function_name, *query_ptr, replace); } catch (Exception & exception) { - UserDefinedSQLFunctionFactory::instance().unregisterFunction(function_name); + user_defined_function_factory.unregisterFunction(function_name); exception.addMessage(fmt::format("while storing user defined function {} on disk", backQuote(function_name))); throw; } @@ -66,42 +80,9 @@ void InterpreterCreateFunctionQuery::validateFunction(ASTPtr function, const Str } ASTPtr function_body = function->as()->children.at(0)->children.at(1); - std::unordered_set identifiers_in_body = getIdentifiers(function_body); - - for (const auto & identifier : identifiers_in_body) - { - if (!arguments.contains(identifier)) - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Identifier {} does not exist in arguments", backQuote(identifier)); - } - validateFunctionRecursiveness(function_body, name); } -std::unordered_set InterpreterCreateFunctionQuery::getIdentifiers(ASTPtr node) -{ - std::unordered_set identifiers; - - std::stack ast_nodes_to_process; - ast_nodes_to_process.push(node); - - while (!ast_nodes_to_process.empty()) - { - auto ast_node_to_process = ast_nodes_to_process.top(); - ast_nodes_to_process.pop(); - - for (const auto & child : ast_node_to_process->children) - { - auto identifier_name_opt = tryGetIdentifierName(child); - if (identifier_name_opt) - identifiers.insert(identifier_name_opt.value()); - - ast_nodes_to_process.push(child); - } - } - - return identifiers; -} - void InterpreterCreateFunctionQuery::validateFunctionRecursiveness(ASTPtr node, const String & function_to_create) { for (const auto & child : node->children) diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.h b/src/Interpreters/InterpreterCreateFunctionQuery.h index fdc03b379db..a67fdb9605d 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.h +++ b/src/Interpreters/InterpreterCreateFunctionQuery.h @@ -22,7 +22,6 @@ public: private: static void validateFunction(ASTPtr function, const String & name); - static std::unordered_set getIdentifiers(ASTPtr node); static void validateFunctionRecursiveness(ASTPtr node, const String & function_to_create); ASTPtr query_ptr; diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 2ebae17cd6b..5370aee1096 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -128,10 +128,10 @@ BlockIO InterpreterDescribeQuery::execute() { for (const auto & column : columns) { - column.type->forEachSubcolumn([&](const auto & name, const auto & type, const auto & path) + IDataType::forEachSubcolumn([&](const auto & path, const auto & name, const auto & data) { res_columns[0]->insert(Nested::concatenateName(column.name, name)); - res_columns[1]->insert(type->getName()); + res_columns[1]->insert(data.type->getName()); /// It's not trivial to calculate default expression for subcolumn. /// So, leave it empty. @@ -150,7 +150,7 @@ BlockIO InterpreterDescribeQuery::execute() res_columns[6]->insertDefault(); res_columns[7]->insert(1u); - }); + }, column.type->getDefaultSerialization(), column.type, nullptr); } } diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index 53cb96b42fe..b788c8f960f 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -18,6 +18,11 @@ BlockIO InterpreterDropFunctionQuery::execute() FunctionNameNormalizer().visit(query_ptr.get()); auto & drop_function_query = query_ptr->as(); + auto & user_defined_functions_factory = UserDefinedSQLFunctionFactory::instance(); + + if (drop_function_query.if_exists && !user_defined_functions_factory.has(drop_function_query.function_name)) + return {}; + UserDefinedSQLFunctionFactory::instance().unregisterFunction(drop_function_query.function_name); UserDefinedSQLObjectsLoader::instance().removeObject(current_context, UserDefinedSQLObjectType::Function, drop_function_query.function_name); diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 6eb188bce9f..24c30a8be30 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -1,8 +1,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index c29eace1b55..3afb1e1fbb0 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 54307ae848b..fcf5f19aef6 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -278,7 +278,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else if (query->as()) { - return std::make_unique(query, context, false /*is_internal*/); + return std::make_unique(query, context, true /*persist_function*/); } else if (query->as()) { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a36941ea07a..6a1a8652b23 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -3,8 +3,6 @@ #include #include #include -#include -#include #include #include #include @@ -20,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 0d6fe34c0c2..e5733a8c28b 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -1,7 +1,6 @@ #pragma once -#include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 85cc889319f..df8c539089e 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1,6 +1,3 @@ -#include -#include - #include #include @@ -36,7 +33,7 @@ #include #include -#include +#include #include #include #include @@ -46,15 +43,12 @@ #include #include #include -#include #include #include #include -#include +#include #include -#include #include -#include #include #include #include @@ -64,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -158,24 +151,16 @@ InterpreterSelectQuery::InterpreterSelectQuery( ContextPtr context_, const SelectQueryOptions & options_, const Names & required_result_column_names_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, nullptr, options_, required_result_column_names_) + : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, nullptr, options_, required_result_column_names_) { } -InterpreterSelectQuery::InterpreterSelectQuery( - const ASTPtr & query_ptr_, - ContextPtr context_, - const BlockInputStreamPtr & input_, - const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, input_, std::nullopt, nullptr, options_.copy().noSubquery()) -{} - InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, ContextPtr context_, Pipe input_pipe_, const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::move(input_pipe_), nullptr, options_.copy().noSubquery()) + : InterpreterSelectQuery(query_ptr_, context_, std::move(input_pipe_), nullptr, options_.copy().noSubquery()) {} InterpreterSelectQuery::InterpreterSelectQuery( @@ -184,7 +169,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( const StoragePtr & storage_, const StorageMetadataPtr & metadata_snapshot_, const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) + : InterpreterSelectQuery(query_ptr_, context_, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) {} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -268,7 +253,6 @@ static bool shouldIgnoreQuotaAndLimits(const StorageID & table_id) InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, ContextPtr context_, - const BlockInputStreamPtr & input_, std::optional input_pipe_, const StoragePtr & storage_, const SelectQueryOptions & options_, @@ -277,7 +261,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// NOTE: the query almost always should be cloned because it will be modified during analysis. : IInterpreterUnionOrSelectQuery(options_.modify_inplace ? query_ptr_ : query_ptr_->clone(), context_, options_) , storage(storage_) - , input(input_) , input_pipe(std::move(input_pipe_)) , log(&Poco::Logger::get("InterpreterSelectQuery")) , metadata_snapshot(metadata_snapshot_) @@ -286,6 +269,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.ignore_projections = options.ignore_projections; query_info.is_projection_query = options.is_projection_query; + query_info.original_query = query_ptr->clone(); initSettings(); const Settings & settings = context->getSettingsRef(); @@ -294,13 +278,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(), ErrorCodes::TOO_DEEP_SUBQUERIES); - bool has_input = input || input_pipe; - if (input) - { - /// Read from prepared input. - source_header = input->getHeader(); - } - else if (input_pipe) + bool has_input = input_pipe != std::nullopt; + if (input_pipe) { /// Read from prepared input. source_header = input_pipe->getHeader(); @@ -450,17 +429,17 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.only_analyze) { - if (query.sampleSize() && (input || input_pipe || !storage || !storage->supportsSampling())) + if (query.sampleSize() && (input_pipe || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); - if (query.final() && (input || input_pipe || !storage || !storage->supportsFinal())) + if (query.final() && (input_pipe || !storage || !storage->supportsFinal())) throw Exception( - (!input && !input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", + (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", ErrorCodes::ILLEGAL_FINAL); - if (query.prewhere() && (input || input_pipe || !storage || !storage->supportsPrewhere())) + if (query.prewhere() && (input_pipe || !storage || !storage->supportsPrewhere())) throw Exception( - (!input && !input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", + (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", ErrorCodes::ILLEGAL_PREWHERE); /// Save the new temporary tables in the query context @@ -578,7 +557,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( void InterpreterSelectQuery::buildQueryPlan(QueryPlan & query_plan) { - executeImpl(query_plan, input, std::move(input_pipe)); + executeImpl(query_plan, std::move(input_pipe)); /// We must guarantee that result structure is the same as in getSampleBlock() /// @@ -926,7 +905,7 @@ static bool hasWithTotalsInAnySubqueryInFromClause(const ASTSelectQuery & query) } -void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInputStreamPtr & prepared_input, std::optional prepared_pipe) +void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional prepared_pipe) { /** Streams of data. When the query is executed in parallel, we have several data streams. * If there is no GROUP BY, then perform all operations before ORDER BY and LIMIT in parallel, then @@ -1010,13 +989,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu } else { - if (prepared_input) - { - auto prepared_source_step - = std::make_unique(Pipe(std::make_shared(prepared_input)), context); - query_plan.addStep(std::move(prepared_source_step)); - } - else if (prepared_pipe) + if (prepared_pipe) { auto prepared_source_step = std::make_unique(std::move(*prepared_pipe), context); query_plan.addStep(std::move(prepared_source_step)); @@ -1580,7 +1553,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() { if (!expressions.prewhere_info) { - const bool does_storage_support_prewhere = !input && !input_pipe && storage && storage->supportsPrewhere(); + const bool does_storage_support_prewhere = !input_pipe && storage && storage->supportsPrewhere(); if (does_storage_support_prewhere && shouldMoveToPrewhere()) { /// Execute row level filter in prewhere as a part of "move to prewhere" optimization. @@ -2285,35 +2258,20 @@ void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan) // happens in case of `over ()`. if (!w.full_sort_description.empty() && (i == 0 || !sortIsPrefix(w, *windows_sorted[i - 1]))) { - auto partial_sorting = std::make_unique( - query_plan.getCurrentDataStream(), - w.full_sort_description, - 0 /* LIMIT */, - SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode)); - partial_sorting->setStepDescription("Sort each block for window '" + w.window_name + "'"); - query_plan.addStep(std::move(partial_sorting)); - auto merge_sorting_step = std::make_unique( + auto sorting_step = std::make_unique( query_plan.getCurrentDataStream(), w.full_sort_description, settings.max_block_size, 0 /* LIMIT */, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), settings.max_bytes_before_remerge_sort, settings.remerge_sort_lowered_memory_bytes_ratio, settings.max_bytes_before_external_sort, context->getTemporaryVolume(), settings.min_free_disk_space_for_temporary_data); - merge_sorting_step->setStepDescription("Merge sorted blocks for window '" + w.window_name + "'"); - query_plan.addStep(std::move(merge_sorting_step)); - - // First MergeSorted, now MergingSorted. - auto merging_sorted = std::make_unique( - query_plan.getCurrentDataStream(), - w.full_sort_description, - settings.max_block_size, - 0 /* LIMIT */); - merging_sorted->setStepDescription("Merge sorted streams for window '" + w.window_name + "'"); - query_plan.addStep(std::move(merging_sorted)); + sorting_step->setStepDescription("Sorting for window '" + w.window_name + "'"); + query_plan.addStep(std::move(sorting_step)); } auto window_step = std::make_unique(query_plan.getCurrentDataStream(), w, w.window_functions); @@ -2328,14 +2286,12 @@ void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, Input { const Settings & settings = context->getSettingsRef(); - const auto & query = getSelectQuery(); - auto finish_sorting_step = std::make_unique( + auto finish_sorting_step = std::make_unique( query_plan.getCurrentDataStream(), input_sorting_info->order_key_prefix_descr, output_order_descr, settings.max_block_size, - limit, - query.hasFiltration()); + limit); query_plan.addStep(std::move(finish_sorting_step)); } @@ -2360,32 +2316,21 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo const Settings & settings = context->getSettingsRef(); - auto partial_sorting = std::make_unique( - query_plan.getCurrentDataStream(), - output_order_descr, - limit, - SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode)); - - partial_sorting->setStepDescription("Sort each block for ORDER BY"); - query_plan.addStep(std::move(partial_sorting)); - /// Merge the sorted blocks. - auto merge_sorting_step = std::make_unique( + auto sorting_step = std::make_unique( query_plan.getCurrentDataStream(), output_order_descr, settings.max_block_size, limit, + SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode), settings.max_bytes_before_remerge_sort, settings.remerge_sort_lowered_memory_bytes_ratio, settings.max_bytes_before_external_sort, context->getTemporaryVolume(), settings.min_free_disk_space_for_temporary_data); - merge_sorting_step->setStepDescription("Merge sorted blocks for ORDER BY"); - query_plan.addStep(std::move(merge_sorting_step)); - - /// If there are several streams, we merge them into one - executeMergeSorted(query_plan, output_order_descr, limit, "for ORDER BY"); + sorting_step->setStepDescription("Sorting for ORDER BY"); + query_plan.addStep(std::move(sorting_step)); } @@ -2403,7 +2348,7 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const So const Settings & settings = context->getSettingsRef(); auto merging_sorted - = std::make_unique(query_plan.getCurrentDataStream(), sort_description, settings.max_block_size, limit); + = std::make_unique(query_plan.getCurrentDataStream(), sort_description, settings.max_block_size, limit); merging_sorted->setStepDescription("Merge sorted streams " + description); query_plan.addStep(std::move(merging_sorted)); diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 99c95a8d624..cf24d14b737 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -3,7 +3,6 @@ #include #include -#include #include #include #include @@ -52,13 +51,6 @@ public: const SelectQueryOptions &, const Names & required_result_column_names_ = Names{}); - /// Read data not from the table specified in the query, but from the prepared source `input`. - InterpreterSelectQuery( - const ASTPtr & query_ptr_, - ContextPtr context_, - const BlockInputStreamPtr & input_, - const SelectQueryOptions & = {}); - /// Read data not from the table specified in the query, but from the prepared pipe `input`. InterpreterSelectQuery( const ASTPtr & query_ptr_, @@ -108,7 +100,6 @@ private: InterpreterSelectQuery( const ASTPtr & query_ptr_, ContextPtr context_, - const BlockInputStreamPtr & input_, std::optional input_pipe, const StoragePtr & storage_, const SelectQueryOptions &, @@ -122,7 +113,7 @@ private: Block getSampleBlockImpl(); - void executeImpl(QueryPlan & query_plan, const BlockInputStreamPtr & prepared_input, std::optional prepared_pipe); + void executeImpl(QueryPlan & query_plan, std::optional prepared_pipe); /// Different stages of query execution. @@ -198,7 +189,6 @@ private: TableLockHolder table_lock; /// Used when we read from prepared input, not table or subquery. - BlockInputStreamPtr input; std::optional input_pipe; Poco::Logger * log; diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 4aeaa9e4f13..e7ea08e557d 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -222,6 +222,14 @@ InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery) { + if (!context_->hasQueryContext()) + { + if (is_subquery) + return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); + else + return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); + } + auto & cache = context_->getSampleBlockCache(); /// Using query string because query_ptr changes for every internal SELECT auto key = queryToString(query_ptr_); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index adf1aae3ff3..30a417f6fa7 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -2,8 +2,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index bc0aeda56bd..e5e447562c6 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -15,8 +15,7 @@ limitations under the License. */ #include #include #include -#include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterWatchQuery.h b/src/Interpreters/InterpreterWatchQuery.h index 2bc7236582a..ac167182a71 100644 --- a/src/Interpreters/InterpreterWatchQuery.h +++ b/src/Interpreters/InterpreterWatchQuery.h @@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include #include #include @@ -38,8 +38,6 @@ private: /// Table from where to read data, if not subquery. StoragePtr storage; - /// Streams of read data - BlockInputStreams streams; }; } diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index aaa7441b8a4..30115710e22 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 3cd076f91bb..ed20b1b2048 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -18,7 +18,7 @@ #include #include #include - +#include namespace DB { @@ -524,7 +524,8 @@ std::vector normalizeColumnNamesExtractNeeded( size_t count = countTablesWithColumn(tables, short_name); - if (count > 1 || aliases.count(short_name)) + /// isValidIdentifierBegin retuired to be consistent with TableJoin::deduplicateAndQualifyColumnNames + if (count > 1 || aliases.count(short_name) || !isValidIdentifierBegin(short_name.at(0))) { const auto & table = tables[*table_pos]; IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 9cfc43b92c5..7fb9f1a3ceb 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -4,8 +4,7 @@ #include #include -#include -#include +#include #include #include #include @@ -13,9 +12,9 @@ #include #include #include -#include +#include #include -#include +#include namespace DB @@ -592,9 +591,10 @@ void MergeJoin::mergeInMemoryRightBlocks() builder.getHeader(), right_sort_description, max_rows_in_right_block, 0, 0, 0, 0, nullptr, 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); - auto sorted_input = PipelineExecutingBlockInputStream(std::move(pipeline)); + PullingPipelineExecutor executor(pipeline); - while (Block block = sorted_input.read()) + Block block; + while (executor.pull(block)) { if (!block.rows()) continue; diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 8782a2f7535..0e2e771255d 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 2c12c4a6879..471ad67d4e7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -6,17 +6,17 @@ #include #include #include +#include #include #include #include #include #include -#include +#include #include #include #include #include -#include #include #include #include @@ -54,24 +54,33 @@ public: { ContextPtr context; std::optional nondeterministic_function_name; + bool subquery = false; }; - static bool needChildVisit(const ASTPtr & /*node*/, const ASTPtr & child) + static bool needChildVisit(const ASTPtr & /*node*/, const ASTPtr & /*child*/) { - return child != nullptr; + return true; } static void visit(const ASTPtr & node, Data & data) { - if (data.nondeterministic_function_name) + if (data.nondeterministic_function_name || data.subquery) return; - if (const auto * function = typeid_cast(node.get())) + if (node->as()) + { + /// We cannot determine if subquery is deterministic or not, + /// so we do not allow to use subqueries in mutation without allow_nondeterministic_mutations=1 + data.subquery = true; + } + else if (const auto * function = typeid_cast(node.get())) { /// Property of being deterministic for lambda expression is completely determined /// by the contents of its definition, so we just proceed to it. if (function->name != "lambda") { + /// NOTE It may be an aggregate function, so get(...) may throw. + /// However, an aggregate function can be used only in subquery and we do not go into subquery. const auto func = FunctionFactory::instance().get(function->name, data.context); if (!func->isDeterministic()) data.nondeterministic_function_name = func->getName(); @@ -81,10 +90,11 @@ public: }; using FirstNonDeterministicFunctionFinder = InDepthNodeVisitor; +using FirstNonDeterministicFunctionData = FirstNonDeterministicFunctionMatcher::Data; -std::optional findFirstNonDeterministicFunctionName(const MutationCommand & command, ContextPtr context) +FirstNonDeterministicFunctionData findFirstNonDeterministicFunctionName(const MutationCommand & command, ContextPtr context) { - FirstNonDeterministicFunctionMatcher::Data finder_data{context, std::nullopt}; + FirstNonDeterministicFunctionMatcher::Data finder_data{context, std::nullopt, false}; switch (command.type) { @@ -94,7 +104,7 @@ std::optional findFirstNonDeterministicFunctionName(const MutationComman FirstNonDeterministicFunctionFinder(finder_data).visit(update_assignments_ast); if (finder_data.nondeterministic_function_name) - return finder_data.nondeterministic_function_name; + return finder_data; /// Currently UPDATE and DELETE both always have predicates so we can use fallthrough [[fallthrough]]; @@ -105,7 +115,7 @@ std::optional findFirstNonDeterministicFunctionName(const MutationComman auto predicate_ast = command.predicate->clone(); FirstNonDeterministicFunctionFinder(finder_data).visit(predicate_ast); - return finder_data.nondeterministic_function_name; + return finder_data; } default: @@ -775,8 +785,12 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & for (const String & column : stage.output_columns) all_asts->children.push_back(std::make_shared(column)); - auto syntax_result = TreeRewriter(context).analyze(all_asts, all_columns, storage, metadata_snapshot); - if (context->hasQueryContext()) + /// Executing scalar subquery on that stage can lead to deadlock + /// e.g. ALTER referencing the same table in scalar subquery + bool execute_scalar_subqueries = !dry_run; + auto syntax_result = TreeRewriter(context).analyze( + all_asts, all_columns, storage, metadata_snapshot, false, true, execute_scalar_subqueries); + if (execute_scalar_subqueries && context->hasQueryContext()) for (const auto & it : syntax_result->getScalars()) context->getQueryContext()->addScalar(it.first, it.second); @@ -918,12 +932,15 @@ void MutationsInterpreter::validate() { for (const auto & command : commands) { - const auto nondeterministic_func_name = findFirstNonDeterministicFunctionName(command, context); - if (nondeterministic_func_name) - throw Exception( - "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions! " - "Function '" + *nondeterministic_func_name + "' is non-deterministic", - ErrorCodes::BAD_ARGUMENTS); + const auto nondeterministic_func_data = findFirstNonDeterministicFunctionName(command, context); + if (nondeterministic_func_data.subquery) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ALTER UPDATE/ALTER DELETE statement with subquery may be nondeterministic, " + "see allow_nondeterministic_mutations setting"); + + if (nondeterministic_func_data.nondeterministic_function_name) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER UPDATE/ALTER DELETE statements must use only deterministic functions. " + "Function '{}' is non-deterministic", *nondeterministic_func_data.nondeterministic_function_name); } } diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 5a77ebb1dfe..81afa990d3b 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index e0a52772da7..02be24bb2bd 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 4386c294316..472efc109fb 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -11,6 +11,11 @@ namespace ProfileEvents { +std::shared_ptr TypeEnum = std::make_shared(DB::DataTypeEnum8::Values{ + { "increment", static_cast(INCREMENT)}, + { "gauge", static_cast(GAUGE)}, +}); + /// Put implementation here to avoid extra linking dependencies for clickhouse_common_io void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only) { diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 699c997d904..8a92eadec79 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include @@ -9,4 +10,13 @@ namespace ProfileEvents /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +/// This is for ProfileEvents packets. +enum Type : int8_t +{ + INCREMENT = 1, + GAUGE = 2, +}; + +extern std::shared_ptr TypeEnum; + } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 98d0bcb88ac..865c9551219 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,12 +4,15 @@ #include #include #include +#include #include #include #include #include #include +#include + #include #include #include @@ -240,19 +243,23 @@ void Session::shutdownNamedSessions() NamedSessionsStorage::instance().shutdown(); } - Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_) : session_id(UUIDHelpers::generateV4()), - global_context(global_context_) + global_context(global_context_), + log(&Poco::Logger::get(String{magic_enum::enum_name(interface_)} + "-Session")) { prepared_client_info.emplace(); prepared_client_info->interface = interface_; } -Session::Session(Session &&) = default; - Session::~Session() { + LOG_DEBUG(log, "{} Destroying {} of user {}", + toString(session_id), + (named_session ? "named session '" + named_session->key.second + "'" : "unnamed session"), + (user_id ? toString(*user_id) : "") + ); + /// Early release a NamedSessionData. if (named_session) named_session->release(); @@ -298,12 +305,18 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) address = Poco::Net::SocketAddress{"127.0.0.1", 0}; + LOG_DEBUG(log, "{} Authenticating user '{}' from {}", + toString(session_id), credentials_.getUserName(), address.toString()); + try { user_id = global_context->getAccessControlManager().login(credentials_, address.host()); + LOG_DEBUG(log, "{} Authenticated with global context as user {}", + toString(session_id), user_id ? toString(*user_id) : ""); } catch (const Exception & e) { + LOG_DEBUG(log, "{} Authentication failed with error: {}", toString(session_id), e.what()); if (auto session_log = getSessionLog()) session_log->addLoginFailure(session_id, *prepared_client_info, credentials_.getUserName(), e); throw; @@ -336,6 +349,8 @@ ContextMutablePtr Session::makeSessionContext() if (query_context_created) throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + LOG_DEBUG(log, "{} Creating session context with user_id: {}", + toString(session_id), user_id ? toString(*user_id) : ""); /// Make a new session context. ContextMutablePtr new_session_context; new_session_context = Context::createCopy(global_context); @@ -364,6 +379,9 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: if (query_context_created) throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", + toString(session_id), session_name_, user_id ? toString(*user_id) : ""); + /// Make a new session context OR /// if the `session_id` and `user_id` were used before then just get a previously created session context. std::shared_ptr new_named_session; @@ -420,6 +438,12 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t ContextMutablePtr query_context = Context::createCopy(from_session_context ? session_context : global_context); query_context->makeQueryContext(); + LOG_DEBUG(log, "{} Creating query context from {} context, user_id: {}, parent context user: {}", + toString(session_id), + from_session_context ? "session" : "global", + user_id ? toString(*user_id) : "", + query_context->getUser() ? query_context->getUser()->getName() : ""); + /// Copy the specified client info to the new query context. auto & res_client_info = query_context->getClientInfo(); if (client_info_to_move) @@ -460,7 +484,7 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t if (!notified_session_log_about_login) { - if (auto session_log = getSessionLog(); session_log && user) + if (auto session_log = getSessionLog(); user && user_id && session_log) { session_log->addLoginSuccess( session_id, diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 494b7750f1e..b62327103e9 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -33,9 +33,10 @@ public: static void shutdownNamedSessions(); Session(const ContextPtr & global_context_, ClientInfo::Interface interface_); - Session(Session &&); ~Session(); + Session(const Session &&) = delete; + Session& operator=(const Session &&) = delete; Session(const Session &) = delete; Session& operator=(const Session &) = delete; @@ -90,6 +91,8 @@ private: std::shared_ptr named_session; bool named_session_created = false; + + Poco::Logger * log = nullptr; }; } diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 118779f1935..3146b6af03f 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index ebe4aba71ab..1945824636f 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -1,11 +1,11 @@ #include #include -#include +#include #include #include #include -#include -#include +#include +#include #include diff --git a/src/Interpreters/SortedBlocksWriter.h b/src/Interpreters/SortedBlocksWriter.h index c65511e943e..0262a274c68 100644 --- a/src/Interpreters/SortedBlocksWriter.h +++ b/src/Interpreters/SortedBlocksWriter.h @@ -6,9 +6,8 @@ #include #include #include -#include -#include -#include +#include +#include namespace DB diff --git a/src/DataStreams/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp similarity index 98% rename from src/DataStreams/SquashingTransform.cpp rename to src/Interpreters/SquashingTransform.cpp index ea99dc49780..54d9a1db25e 100644 --- a/src/DataStreams/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,5 +1,4 @@ -#include -#include +#include namespace DB diff --git a/src/DataStreams/SquashingTransform.h b/src/Interpreters/SquashingTransform.h similarity index 100% rename from src/DataStreams/SquashingTransform.h rename to src/Interpreters/SquashingTransform.h diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 22cd958f4f8..7cd53442ffd 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -7,8 +7,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 9bcddb6b982..e0968b7fce4 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1100,7 +1100,8 @@ TreeRewriterResultPtr TreeRewriter::analyze( ConstStoragePtr storage, const StorageMetadataPtr & metadata_snapshot, bool allow_aggregations, - bool allow_self_aliases) const + bool allow_self_aliases, + bool execute_scalar_subqueries) const { if (query->as()) throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); @@ -1112,7 +1113,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases); /// Executing scalar subqueries. Column defaults could be a scalar subquery. - executeScalarSubqueries(query, getContext(), 0, result.scalars, false); + executeScalarSubqueries(query, getContext(), 0, result.scalars, !execute_scalar_subqueries); if (settings.legacy_column_name_of_tuple_literal) markTupleLiteralsAsLegacy(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 98201f35216..52c62cc4cec 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -110,7 +110,8 @@ public: ConstStoragePtr storage = {}, const StorageMetadataPtr & metadata_snapshot = {}, bool allow_aggregations = false, - bool allow_self_aliases = true) const; + bool allow_self_aliases = true, + bool execute_scalar_subqueries = true) const; /// Analyze and rewrite select query TreeRewriterResultPtr analyzeSelect( diff --git a/src/Interpreters/UserDefinedExecutableFunction.cpp b/src/Interpreters/UserDefinedExecutableFunction.cpp index 06830df68e6..d57978d0fd6 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.cpp +++ b/src/Interpreters/UserDefinedExecutableFunction.cpp @@ -4,8 +4,8 @@ #include #include -#include -#include +#include +#include namespace DB diff --git a/src/Interpreters/UserDefinedExecutableFunction.h b/src/Interpreters/UserDefinedExecutableFunction.h index 240422a02ca..1cb1de47578 100644 --- a/src/Interpreters/UserDefinedExecutableFunction.h +++ b/src/Interpreters/UserDefinedExecutableFunction.h @@ -3,7 +3,7 @@ #include #include -#include +#include #include diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp index d6ad2666ff1..cfa1171a84b 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp @@ -2,8 +2,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp index 434f5523b42..f036741ca21 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.cpp @@ -19,7 +19,7 @@ UserDefinedSQLFunctionFactory & UserDefinedSQLFunctionFactory::instance() return result; } -void UserDefinedSQLFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query) +void UserDefinedSQLFunctionFactory::registerFunction(const String & function_name, ASTPtr create_function_query, bool replace) { if (FunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The function '{}' already exists", function_name); @@ -29,11 +29,17 @@ void UserDefinedSQLFunctionFactory::registerFunction(const String & function_nam std::lock_guard lock(mutex); - auto [_, inserted] = function_name_to_create_query.emplace(function_name, std::move(create_function_query)); + auto [it, inserted] = function_name_to_create_query.emplace(function_name, create_function_query); + if (!inserted) - throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, - "The function name '{}' is not unique", - function_name); + { + if (replace) + it->second = std::move(create_function_query); + else + throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, + "The function name '{}' is not unique", + function_name); + } } void UserDefinedSQLFunctionFactory::unregisterFunction(const String & function_name) @@ -77,6 +83,11 @@ ASTPtr UserDefinedSQLFunctionFactory::tryGet(const std::string & function_name) return it->second; } +bool UserDefinedSQLFunctionFactory::has(const String & function_name) const +{ + return tryGet(function_name) != nullptr; +} + std::vector UserDefinedSQLFunctionFactory::getAllRegisteredNames() const { std::vector registered_names; diff --git a/src/Interpreters/UserDefinedSQLFunctionFactory.h b/src/Interpreters/UserDefinedSQLFunctionFactory.h index 366e27e833d..6487b951705 100644 --- a/src/Interpreters/UserDefinedSQLFunctionFactory.h +++ b/src/Interpreters/UserDefinedSQLFunctionFactory.h @@ -10,19 +10,31 @@ namespace DB { +/// Factory for SQLUserDefinedFunctions class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory> { public: static UserDefinedSQLFunctionFactory & instance(); - void registerFunction(const String & function_name, ASTPtr create_function_query); + /** Register function for function_name in factory for specified create_function_query. + * If replace = true and function with function_name already exists replace it with create_function_query. + * Otherwise throws exception. + */ + void registerFunction(const String & function_name, ASTPtr create_function_query, bool replace); + /// Unregister function for function_name void unregisterFunction(const String & function_name); + /// Get function create query for function_name. If no function registered with function_name throws exception. ASTPtr get(const String & function_name) const; + /// Get function create query for function_name. If no function registered with function_name return nullptr. ASTPtr tryGet(const String & function_name) const; + /// Check if function with function_name registered. + bool has(const String & function_name) const; + + /// Get all user defined functions registered names. std::vector getAllRegisteredNames() const override; private: diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index cc5db020387..d9ac53097ab 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -18,15 +18,19 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) +void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data & data) { auto * function = ast->as(); if (!function) return; auto result = tryToReplaceFunction(*function); + if (result) + { ast = result; + visit(ast, data); + } } bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr &) @@ -83,9 +87,16 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f if (identifier_name_opt) { auto function_argument_it = identifier_name_to_function_argument.find(*identifier_name_opt); - assert(function_argument_it != identifier_name_to_function_argument.end()); + if (function_argument_it == identifier_name_to_function_argument.end()) + continue; + + auto child_alias = child->tryGetAlias(); child = function_argument_it->second->clone(); + + if (!child_alias.empty()) + child->setAlias(child_alias); + continue; } diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp index e4eb97f3002..a71f1f0799c 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.cpp +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.cpp @@ -69,7 +69,7 @@ void UserDefinedSQLObjectsLoader::loadUserDefinedObject(ContextPtr context, User 0, context->getSettingsRef().max_parser_depth); - InterpreterCreateFunctionQuery interpreter(ast, context, true /*is internal*/); + InterpreterCreateFunctionQuery interpreter(ast, context, false /*persist_function*/); interpreter.execute(); } } @@ -111,7 +111,7 @@ void UserDefinedSQLObjectsLoader::loadObjects(ContextPtr context) } } -void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast) +void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace) { if (unlikely(!enable_persistence)) return; @@ -127,7 +127,7 @@ void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQL } } - if (std::filesystem::exists(file_path)) + if (!replace && std::filesystem::exists(file_path)) throw Exception(ErrorCodes::OBJECT_ALREADY_STORED_ON_DISK, "User defined object {} already stored on disk", backQuote(file_path)); LOG_DEBUG(log, "Storing object {} to file {}", backQuote(object_name), file_path); @@ -135,9 +135,9 @@ void UserDefinedSQLObjectsLoader::storeObject(ContextPtr context, UserDefinedSQL WriteBufferFromOwnString create_statement_buf; formatAST(ast, create_statement_buf, false); writeChar('\n', create_statement_buf); - String create_statement = create_statement_buf.str(); - WriteBufferFromFile out(file_path, create_statement.size(), O_WRONLY | O_CREAT | O_EXCL); + + WriteBufferFromFile out(file_path, create_statement.size()); writeString(create_statement, out); out.next(); if (context->getSettingsRef().fsync_metadata) diff --git a/src/Interpreters/UserDefinedSQLObjectsLoader.h b/src/Interpreters/UserDefinedSQLObjectsLoader.h index 17493933f21..2e747f67a8d 100644 --- a/src/Interpreters/UserDefinedSQLObjectsLoader.h +++ b/src/Interpreters/UserDefinedSQLObjectsLoader.h @@ -21,7 +21,7 @@ public: UserDefinedSQLObjectsLoader(); void loadObjects(ContextPtr context); - void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast); + void storeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name, const IAST & ast, bool replace); void removeObject(ContextPtr context, UserDefinedSQLObjectType object_type, const String & object_name); /// For ClickHouse local if path is not set we can disable loader. diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 57bb4aa4905..a91cb1fbeb2 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -203,6 +203,12 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID return src; } + if (which_type.isDate32() && src.getType() == Field::Types::Int64) + { + /// We don't need any conversion Int64 is under type of Date32 + return src; + } + if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64) { /// Already in needed type. @@ -210,7 +216,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } if (which_type.isDateTime64() - && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDate() || which_from_type.isDateTime() || which_from_type.isDateTime64())) + && (which_from_type.isNativeInt() || which_from_type.isNativeUInt() || which_from_type.isDate() || which_from_type.isDate32() || which_from_type.isDateTime() || which_from_type.isDateTime64())) { const auto scale = static_cast(type).getScale(); const auto decimal_value = DecimalUtils::decimalFromComponents(src.reinterpret(), 0, scale); diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 45e1c580f01..c3b8cc5c677 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include diff --git a/src/Interpreters/executeDDLQueryOnCluster.h b/src/Interpreters/executeDDLQueryOnCluster.h index f430c2364b2..e7ec52d03cb 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.h +++ b/src/Interpreters/executeDDLQueryOnCluster.h @@ -1,10 +1,9 @@ #pragma once -#include +#include #include #include #include -#include namespace zkutil diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 3f5b386d16f..95fb8d38454 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -9,9 +9,7 @@ #include #include -#include -#include -#include +#include #include #include @@ -51,7 +49,6 @@ #include #include -#include #include #include diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index a2df9baec73..9c561d8b88c 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index dec925d68c1..bf5d30437ec 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -4,8 +4,6 @@ #include #include -#include - #include #include #include diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 6a3db48e835..65b2065b2ad 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -161,7 +161,7 @@ void loadMetadata(ContextMutablePtr context, const String & default_database_nam bool create_default_db_if_not_exists = !default_database_name.empty(); bool metadata_dir_for_default_db_already_exists = databases.count(default_database_name); if (create_default_db_if_not_exists && !metadata_dir_for_default_db_already_exists) - databases.emplace(default_database_name, path + "/" + escapeForFileName(default_database_name)); + databases.emplace(default_database_name, std::filesystem::path(path) / escapeForFileName(default_database_name)); TablesLoader::Databases loaded_databases; for (const auto & [name, db_path] : databases) diff --git a/src/NOTICE b/src/NOTICE index d0d3efe3f8e..c68280b1529 100644 --- a/src/NOTICE +++ b/src/NOTICE @@ -18,9 +18,6 @@ Common/UInt128.h Core/Block.h Core/Defines.h Core/Settings.h -DataStreams/PushingToViewsBlockOutputStream.cpp -DataStreams/PushingToViewsBlockOutputStream.h -DataStreams/copyData.cpp Databases/DatabasesCommon.cpp IO/WriteBufferValidUTF8.cpp Interpreters/InterpreterAlterQuery.cpp diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 53d53bf5ae1..d3153952114 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -157,6 +157,10 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & settings.ostr << (settings.hilite ? hilite_keyword : "") << "MODIFY SAMPLE BY " << (settings.hilite ? hilite_none : ""); sample_by->formatImpl(settings, state, frame); } + else if (type == ASTAlterCommand::REMOVE_SAMPLE_BY) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << "REMOVE SAMPLE BY" << (settings.hilite ? hilite_none : ""); + } else if (type == ASTAlterCommand::ADD_INDEX) { settings.ostr << (settings.hilite ? hilite_keyword : "") << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 9b40586e09f..3e0d9219549 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -41,6 +41,7 @@ public: RESET_SETTING, MODIFY_QUERY, REMOVE_TTL, + REMOVE_SAMPLE_BY, ADD_INDEX, DROP_INDEX, diff --git a/src/Parsers/ASTCreateFunctionQuery.cpp b/src/Parsers/ASTCreateFunctionQuery.cpp index 0b3991ddc44..4e1e7de660d 100644 --- a/src/Parsers/ASTCreateFunctionQuery.cpp +++ b/src/Parsers/ASTCreateFunctionQuery.cpp @@ -12,7 +12,18 @@ ASTPtr ASTCreateFunctionQuery::clone() const void ASTCreateFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState & state, IAST::FormatStateStacked frame) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE FUNCTION " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "CREATE "; + + if (or_replace) + settings.ostr << "OR REPLACE "; + + settings.ostr << "FUNCTION "; + + if (if_not_exists) + settings.ostr << "IF NOT EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_keyword : "") << " AS " << (settings.hilite ? hilite_none : ""); function_core->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTCreateFunctionQuery.h b/src/Parsers/ASTCreateFunctionQuery.h index 3adddad8fbd..a58fe64c435 100644 --- a/src/Parsers/ASTCreateFunctionQuery.h +++ b/src/Parsers/ASTCreateFunctionQuery.h @@ -12,6 +12,9 @@ public: String function_name; ASTPtr function_core; + bool or_replace = false; + bool if_not_exists = false; + String getID(char) const override { return "CreateFunctionQuery"; } ASTPtr clone() const override; diff --git a/src/Parsers/ASTDropFunctionQuery.cpp b/src/Parsers/ASTDropFunctionQuery.cpp index 5800a7ba9cb..47665aa52f9 100644 --- a/src/Parsers/ASTDropFunctionQuery.cpp +++ b/src/Parsers/ASTDropFunctionQuery.cpp @@ -12,7 +12,12 @@ ASTPtr ASTDropFunctionQuery::clone() const void ASTDropFunctionQuery::formatImpl(const IAST::FormatSettings & settings, IAST::FormatState &, IAST::FormatStateStacked) const { - settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION " << (settings.hilite ? hilite_none : ""); + settings.ostr << (settings.hilite ? hilite_keyword : "") << "DROP FUNCTION "; + + if (if_exists) + settings.ostr << "IF EXISTS "; + + settings.ostr << (settings.hilite ? hilite_none : ""); settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(function_name) << (settings.hilite ? hilite_none : ""); } diff --git a/src/Parsers/ASTDropFunctionQuery.h b/src/Parsers/ASTDropFunctionQuery.h index e32bf93a64d..a9d70a3016f 100644 --- a/src/Parsers/ASTDropFunctionQuery.h +++ b/src/Parsers/ASTDropFunctionQuery.h @@ -10,6 +10,8 @@ class ASTDropFunctionQuery : public IAST public: String function_name; + bool if_exists = false; + String getID(char) const override { return "DropFunctionQuery"; } ASTPtr clone() const override; diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 3c78c4060d6..e1a62dc9dce 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -294,8 +294,12 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format continue; } + if (frame.need_parens) + settings.ostr << '('; arguments->formatImpl(settings, state, nested_need_parens); settings.ostr << (settings.hilite ? hilite_operator : "") << func[1] << (settings.hilite ? hilite_none : ""); + if (frame.need_parens) + settings.ostr << ')'; written = true; @@ -440,6 +444,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (!written && 0 == strcmp(name.c_str(), "lambda")) { + /// Special case: zero elements tuple in lhs of lambda is printed as (). /// Special case: one-element tuple in lhs of lambda is printed as its element. if (frame.need_parens) @@ -449,9 +454,12 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format if (first_arg_func && first_arg_func->name == "tuple" && first_arg_func->arguments - && first_arg_func->arguments->children.size() == 1) + && (first_arg_func->arguments->children.size() == 1 || first_arg_func->arguments->children.empty())) { - first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens); + if (first_arg_func->arguments->children.size() == 1) + first_arg_func->arguments->children[0]->formatImpl(settings, state, nested_need_parens); + else + settings.ostr << "()"; } else arguments->children[0]->formatImpl(settings, state, nested_need_parens); diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 2eade2079da..1ea64d94fe7 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -104,6 +104,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected ParserKeyword s_ttl("TTL"); ParserKeyword s_remove_ttl("REMOVE TTL"); + ParserKeyword s_remove_sample_by("REMOVE SAMPLE BY"); ParserCompoundIdentifier parser_name; ParserStringLiteral parser_string_literal; @@ -669,6 +670,10 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MODIFY_SAMPLE_BY; } + else if (s_remove_sample_by.ignore(pos, expected)) + { + command->type = ASTAlterCommand::REMOVE_SAMPLE_BY; + } else if (s_delete.ignore(pos, expected)) { if (s_in_partition.ignore(pos, expected)) diff --git a/src/Parsers/ParserCreateFunctionQuery.cpp b/src/Parsers/ParserCreateFunctionQuery.cpp index fbfd02415e7..5d84b6bc2dc 100644 --- a/src/Parsers/ParserCreateFunctionQuery.cpp +++ b/src/Parsers/ParserCreateFunctionQuery.cpp @@ -1,10 +1,12 @@ +#include + #include #include #include #include #include #include -#include + namespace DB { @@ -13,6 +15,8 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp { ParserKeyword s_create("CREATE"); ParserKeyword s_function("FUNCTION"); + ParserKeyword s_or_replace("OR REPLACE"); + ParserKeyword s_if_not_exists("IF NOT EXISTS"); ParserIdentifier function_name_p; ParserKeyword s_as("AS"); ParserLambdaExpression lambda_p; @@ -20,12 +24,21 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp ASTPtr function_name; ASTPtr function_core; + bool or_replace = false; + bool if_not_exists = false; + if (!s_create.ignore(pos, expected)) return false; + if (s_or_replace.ignore(pos, expected)) + or_replace = true; + if (!s_function.ignore(pos, expected)) return false; + if (!or_replace && s_if_not_exists.ignore(pos, expected)) + if_not_exists = true; + if (!function_name_p.parse(pos, function_name, expected)) return false; @@ -40,6 +53,8 @@ bool ParserCreateFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Exp create_function_query->function_name = function_name->as().name(); create_function_query->function_core = function_core; + create_function_query->or_replace = or_replace; + create_function_query->if_not_exists = if_not_exists; return true; } diff --git a/src/Parsers/ParserDropFunctionQuery.cpp b/src/Parsers/ParserDropFunctionQuery.cpp index 04d26109836..d8c86646410 100644 --- a/src/Parsers/ParserDropFunctionQuery.cpp +++ b/src/Parsers/ParserDropFunctionQuery.cpp @@ -11,7 +11,10 @@ bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec { ParserKeyword s_drop("DROP"); ParserKeyword s_function("FUNCTION"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserIdentifier function_name_p; + bool if_exists = false; ASTPtr function_name; @@ -21,10 +24,14 @@ bool ParserDropFunctionQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expec if (!s_function.ignore(pos, expected)) return false; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + if (!function_name_p.parse(pos, function_name, expected)) return false; auto drop_function_query = std::make_shared(); + drop_function_query->if_exists = if_exists; node = drop_function_query; drop_function_query->function_name = function_name->as().name(); diff --git a/src/Parsers/fuzzers/CMakeLists.txt b/src/Parsers/fuzzers/CMakeLists.txt index 0dd541e663f..bb52101c847 100644 --- a/src/Parsers/fuzzers/CMakeLists.txt +++ b/src/Parsers/fuzzers/CMakeLists.txt @@ -6,3 +6,6 @@ target_link_libraries(select_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZ add_executable(create_parser_fuzzer create_parser_fuzzer.cpp ${SRCS}) target_link_libraries(create_parser_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE}) + +add_subdirectory(codegen_fuzzer) + diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt new file mode 100644 index 00000000000..6f6b0d07661 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -0,0 +1,48 @@ +find_package(Protobuf REQUIRED) + +set (CURRENT_DIR_IN_SOURCES "${ClickHouse_SOURCE_DIR}/src/Parsers/fuzzers/codegen_fuzzer") +set (CURRENT_DIR_IN_BINARY "${ClickHouse_BINARY_DIR}/src/Parsers/fuzzers/codegen_fuzzer") + +# Copy scripts and template file to build directory to generate .proto and .cpp file from them +configure_file( + "${CURRENT_DIR_IN_SOURCES}/gen.py" + "${CURRENT_DIR_IN_BINARY}/gen.py" + COPYONLY) +configure_file( + "${CURRENT_DIR_IN_SOURCES}/update.sh" + "${CURRENT_DIR_IN_BINARY}/update.sh" + COPYONLY) + +configure_file( + "${CURRENT_DIR_IN_SOURCES}/clickhouse-template.g" + "${CURRENT_DIR_IN_BINARY}/clickhouse-template.g" + COPYONLY) + +# Note that it depends on all.dict file! +add_custom_command( + OUTPUT + "${CURRENT_DIR_IN_BINARY}/clickhouse.g" + COMMAND ./update.sh "${ClickHouse_SOURCE_DIR}/tests/fuzz/all.dict" +) + +add_custom_command( + OUTPUT + "${CURRENT_DIR_IN_BINARY}/out.cpp" + "${CURRENT_DIR_IN_BINARY}/out.proto" + COMMAND python3 gen.py clickhouse.g out.cpp out.proto + DEPENDS "${CURRENT_DIR_IN_BINARY}/clickhouse.g" +) + +PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS "${CURRENT_DIR_IN_BINARY}/out.proto") +set(FUZZER_SRCS codegen_select_fuzzer.cpp "${CURRENT_DIR_IN_BINARY}/out.cpp" ${PROTO_SRCS} ${PROTO_HDRS}) + +set(CMAKE_INCLUDE_CURRENT_DIR TRUE) + +add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) + +set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") + +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${Protobuf_INCLUDE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}") +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}") +target_include_directories(codegen_select_fuzzer BEFORE PRIVATE "${LibProtobufMutator_SOURCE_DIR}/src") +target_link_libraries(codegen_select_fuzzer PRIVATE protobuf-mutator dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g new file mode 100644 index 00000000000..79fd775b1da --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse-template.g @@ -0,0 +1,121 @@ +" "; +" "; +" "; +";"; + + +"(" $1 ")"; +"(" $1 ", " $2 ")"; +"(" $1 ", " $2 ", " $3 ")"; + +$1 ", " $2 ; +$1 ", " $2 ", " $3 ; +$1 ", " $2 ", " $3 ", " $4 ; +$1 ", " $2 ", " $3 ", " $4 ", " $5 ; + +"[" $1 ", " $2 "]"; +"[" $1 ", " $2 ", " $3 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; + +$0 "(" $1 ")"; +$0 "(" $1 ", " $2 ")"; +$0 "(" $1 ", " $2 ", " $3 ")"; + +$1 " as " $2 ; + + +// TODO: add more clickhouse specific stuff +"SELECT " $1 " FROM " $2 " WHERE " $3 ; +"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; +"SELECT " $1 " FROM " $2 " SORT BY " $3 ; +"SELECT " $1 " FROM " $2 " LIMIT " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 ; +"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; +"SELECT " $1 " INTO OUTFILE " $2 ; + +"WITH " $1 " AS " $2 ; + +"{" $1 ":" $2 "}"; +"[" $1 "," $2 "]"; +"[]"; + + +" x "; +"x"; +" `x` "; +"`x`"; + +" \"value\" "; +"\"value\""; +" 0 "; +"0"; +"1"; +"2"; +"123123123123123123"; +"182374019873401982734091873420923123123123123123"; +"1e-1"; +"1.1"; +"\"\""; +" '../../../../../../../../../etc/passwd' "; + +"/"; +"="; +"=="; +"!="; +"<>"; +"<"; +"<="; +">"; +">="; +"<<"; +"|<<"; +"&"; +"|"; +"||"; +"<|"; +"|>"; +"+"; +"-"; +"~"; +"*"; +"/"; +"\\"; +"%"; +""; +"."; +","; +","; +","; +","; +","; +","; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"?"; +":"; +"@"; +"@@"; +"$"; +"\""; +"`"; +"{"; +"}"; +"^"; +"::"; +"->"; +"]"; +"["; + diff --git a/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp new file mode 100644 index 00000000000..9310d7d59f7 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/codegen_select_fuzzer.cpp @@ -0,0 +1,40 @@ + +#include +#include + +#include +#include +#include +#include + +#include + +#include "out.pb.h" + +void GenerateSentence(const Sentence&, std::string &, int); + + +DEFINE_BINARY_PROTO_FUZZER(const Sentence& main) +{ + static std::string input; + input.reserve(4096); + + GenerateSentence(main, input, 0); + if (input.size()) + { + std::cout << input << std::endl; + + DB::ParserQueryWithOutput parser(input.data() + input.size()); + try + { + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + + DB::WriteBufferFromOStream out(std::cerr, 4096); + DB::formatAST(*ast, out); + std::cerr << std::endl; + } + catch (...) {} + + input.clear(); + } +} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/gen.py b/src/Parsers/fuzzers/codegen_fuzzer/gen.py new file mode 100644 index 00000000000..95936247489 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/gen.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 + +import sys +import string + + +TOKEN_TEXT = 1 +TOKEN_VAR = 2 + +TOKEN_COLON = ':' +TOKEN_SEMI = ';' +TOKEN_OR = '|' +TOKEN_QUESTIONMARK = '?' +TOKEN_ROUND_BRACKET_OPEN = '(' +TOKEN_ROUND_BRACKET_CLOSE = ')' +TOKEN_ASTERISK = '*' +TOKEN_SLASH = '/' + + + + +class TextValue: + def __init__(self, t): + self.t = t + self.slug = None + + def get_slug(self): + if self.slug is not None: + return self.slug + slug = '' + for c in self.t: + slug += c if c in string.ascii_letters else '_' + self.slug = slug + return slug + + def get_name(self): + return f"TextValue_{self.get_slug()}" + + def __repr__(self): + return f"TextValue(\"{self.t}\")" + + +class Var: + def __init__(self, id_): + self.id_ = id_ + + def __repr__(self): + return f"Var({self.id_})" + + +class Parser: + def __init__(self): + self.chains = [] + self.text = None + self.col = 0 + self.line = 1 + self.t = None + self.var_id = -1 + self.cur_tok = None + self.includes = [] + + self.proto = '' + self.cpp = '' + + def parse_file(self, filename): + with open(filename) as f: + self.text = f.read() + + while self.parse_statement() is not None: + pass + + def add_include(self, filename): + self.includes.append(filename) + + def get_next_token(self): + self.skip_ws() + + if not len(self.text): + return None + + if self.text[0] == '"': + return self.parse_txt_value() + + if self.text[0] == '$': + return self.parse_var_value() + + c, self.text = self.text[0], self.text[1:] + self.cur_tok = c + return c + + def parse_var_value(self): + i = self.text.find(' ') + + id_, self.text = self.text[1:i], self.text[i+1:] + self.var_id = int(id_) + self.cur_tok = TOKEN_VAR + return TOKEN_VAR + + def parse_txt_value(self): + if self.text[0] != '"': + raise Exception("parse_txt_value: expected quote at the start") + + self.t = '' + self.text = self.text[1:] + + while self.text[0] != '"': + if self.text[0] == '\\': + if self.text[1] == 'x': + self.t += self.text[:4] + self.text = self.text[4:] + elif self.text[1] in 'nt\\"': + self.t += self.text[:2] + self.text = self.text[2:] + else: + raise Exception(f"parse_txt_value: unknown symbol {self.text[0]}") + else: + c, self.text = self.text[0], self.text[1:] + self.t += c + + self.text = self.text[1:] + self.cur_tok = TOKEN_TEXT + return TOKEN_TEXT + + def skip_ws(self): + while self.text and self.text[0] in string.whitespace: + if self.text[0] == '\n': + self.line += 1 + self.col = 0 + self.text = self.text[1:] + self.col += 1 + if not self.text: + return None + return True + + def skip_line(self): + self.line += 1 + index = self.text.find('\n') + self.text = self.text[index:] + + + def parse_statement(self): + if self.skip_ws() is None: + return None + + self.get_next_token() + if self.cur_tok == TOKEN_SLASH: + self.skip_line() + return TOKEN_SLASH + + chain = [] + while self.cur_tok != TOKEN_SEMI: + if self.cur_tok == TOKEN_TEXT: + chain.append(TextValue(self.t)) + elif self.cur_tok == TOKEN_VAR: + chain.append(Var(self.var_id)) + else: + self.fatal_parsing_error(f"unexpected token {self.cur_tok}") + self.get_next_token() + + if not chain: + self.fatal_parsing_error("empty chains are not allowed") + self.chains.append(chain) + return True + + def generate(self): + self.proto = 'syntax = "proto3";\n\n' + self.cpp = '#include \n#include \n#include \n\n#include \n\n' + + for incl_file in self.includes: + self.cpp += f'#include "{incl_file}"\n' + self.cpp += '\n' + + self.proto += 'message Word {\n' + self.proto += '\tenum Value {\n' + + self.cpp += 'void GenerateWord(const Word&, std::string&, int);\n\n' + + self.cpp += 'void GenerateSentence(const Sentence& stc, std::string &s, int depth) {\n' + self.cpp += '\tfor (int i = 0; i < stc.words_size(); i++ ) {\n' + self.cpp += '\t\tGenerateWord(stc.words(i), s, ++depth);\n' + self.cpp += '\t}\n' + self.cpp += '}\n' + + self.cpp += 'void GenerateWord(const Word& word, std::string &s, int depth) {\n' + + self.cpp += '\tif (depth > 5) return;\n\n' + self.cpp += '\tswitch (word.value()) {\n' + + for idx, chain in enumerate(self.chains): + self.proto += f'\t\tvalue_{idx} = {idx};\n' + + self.cpp += f'\t\tcase {idx}: {{\n' + num_var = 0 + for item in chain: + if isinstance(item, TextValue): + self.cpp += f'\t\t\ts += "{item.t}";\n' + elif isinstance(item, Var): + self.cpp += f'\t\t\tif (word.inner().words_size() > {num_var})\t\t\t\tGenerateWord(word.inner().words({num_var}), s, ++depth);\n' + num_var += 1 + else: + raise Exception("unknown token met during generation") + self.cpp += '\t\t\tbreak;\n\t\t}\n' + self.cpp += '\t\tdefault: break;\n' + + self.cpp += '\t}\n' + + self.proto += '\t}\n' + self.proto += '\tValue value = 1;\n' + self.proto += '\tSentence inner = 2;\n' + self.proto += '}\nmessage Sentence {\n\trepeated Word words = 1;\n}' + + self.cpp += '}\n' + return self.cpp, self.proto + + def fatal_parsing_error(self, msg): + print(f"Line: {self.line}, Col: {self.col}") + raise Exception(f"fatal error during parsing. {msg}") + + +def main(args): + input_file, outfile_cpp, outfile_proto = args + + if not outfile_proto.endswith('.proto'): + raise Exception("outfile_proto (argv[3]) should end with `.proto`") + + include_filename = outfile_proto[:-6] + ".pb.h" + + p = Parser() + p.add_include(include_filename) + p.parse_file(input_file) + + cpp, proto = p.generate() + + proto = proto.replace('\t', ' ' * 4) + cpp = cpp.replace('\t', ' ' * 4) + + with open(outfile_cpp, 'w') as f: + f.write(cpp) + + with open(outfile_proto, 'w') as f: + f.write(proto) + + +if __name__ == '__main__': + if len(sys.argv) < 3: + print(f"Usage {sys.argv[0]} ") + sys.exit(1) + main(sys.argv[1:]) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/update.sh b/src/Parsers/fuzzers/codegen_fuzzer/update.sh new file mode 100755 index 00000000000..daee56dcea1 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/update.sh @@ -0,0 +1,30 @@ +#!/bin/bash + + +_main() { + local dict_filename="${1}" + if [[ $# -ne 1 ]]; + then + echo "Usage: $0 "; + exit 1; + fi + + if [[ ! -f $dict_filename ]]; + then + echo "File $dict_filename doesn't exist"; + exit 1 + fi + + cat clickhouse-template.g > clickhouse.g + + while read line; + do + [[ -z "$line" ]] && continue + echo $line | sed -e '/^#/d' -e 's/"\(.*\)"/" \1 ";/g' + done < $dict_filename >> clickhouse.g +} + +_main "$@" + +# Sample run: ./update.sh ${CLICKHOUSE_SOURCE_DIR}/tests/fuzz/all.dict +# then run `python ./gen.py clickhouse.g out.cpp out.proto` to generate new files with tokens. Rebuild fuzzer diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index eb0bf5281c9..c0b57d9b1f5 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -38,7 +38,8 @@ const std::unordered_set keywords "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY" + "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY", "OFFSET", + "TRIM", "LTRIM", "RTRIM", "BOTH", "LEADING", "TRAILING" }; const std::unordered_set keep_words @@ -906,7 +907,13 @@ void obfuscateQueries( /// Write quotes and the obfuscated content inside. result.write(*token.begin); - obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + + /// If it is long, just replace it with hash. Long identifiers in queries are usually auto-generated. + if (token.size() > 32) + writeIntText(sipHash64(token.begin + 1, token.size() - 2), result); + else + obfuscateIdentifier({token.begin + 1, token.size() - 2}, result, obfuscate_map, used_nouns, hash_func); + result.write(token.end[-1]); } else if (token.type == TokenType::Number) diff --git a/src/Processors/Executors/CompletedPipelineExecutor.cpp b/src/Processors/Executors/CompletedPipelineExecutor.cpp index be388386e9d..a4e3dea89fa 100644 --- a/src/Processors/Executors/CompletedPipelineExecutor.cpp +++ b/src/Processors/Executors/CompletedPipelineExecutor.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp b/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp deleted file mode 100644 index bdfbbc2874e..00000000000 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -PipelineExecutingBlockInputStream::PipelineExecutingBlockInputStream(QueryPipeline pipeline_) - : pipeline(std::make_unique(std::move(pipeline_))) -{ -} - -PipelineExecutingBlockInputStream::~PipelineExecutingBlockInputStream() = default; - -Block PipelineExecutingBlockInputStream::getHeader() const -{ - if (executor) - return executor->getHeader(); - - if (async_executor) - return async_executor->getHeader(); - - return pipeline->getHeader(); -} - -void PipelineExecutingBlockInputStream::createExecutor() -{ - if (pipeline->getNumThreads() > 1) - async_executor = std::make_unique(*pipeline); - else - executor = std::make_unique(*pipeline); - - is_execution_started = true; -} - -void PipelineExecutingBlockInputStream::readPrefixImpl() -{ - createExecutor(); -} - -Block PipelineExecutingBlockInputStream::readImpl() -{ - if (!is_execution_started) - createExecutor(); - - Block block; - bool can_continue = true; - while (can_continue) - { - if (executor) - can_continue = executor->pull(block); - else - can_continue = async_executor->pull(block); - - if (block) - return block; - } - - totals = executor ? executor->getTotalsBlock() - : async_executor->getTotalsBlock(); - - extremes = executor ? executor->getExtremesBlock() - : async_executor->getExtremesBlock(); - - return {}; -} - -inline static void throwIfExecutionStarted(bool is_execution_started, const char * method) -{ - if (is_execution_started) - throw Exception(String("Cannot call ") + method + - " for PipelineExecutingBlockInputStream because execution was started", - ErrorCodes::LOGICAL_ERROR); -} - -void PipelineExecutingBlockInputStream::cancel(bool kill) -{ - IBlockInputStream::cancel(kill); - - if (is_execution_started) - { - executor ? executor->cancel() - : async_executor->cancel(); - } -} - -void PipelineExecutingBlockInputStream::setProgressCallback(const ProgressCallback & callback) -{ - throwIfExecutionStarted(is_execution_started, "setProgressCallback"); - pipeline->setProgressCallback(callback); -} - -void PipelineExecutingBlockInputStream::setProcessListElement(QueryStatus * elem) -{ - throwIfExecutionStarted(is_execution_started, "setProcessListElement"); - IBlockInputStream::setProcessListElement(elem); - pipeline->setProcessListElement(elem); -} - -void PipelineExecutingBlockInputStream::setLimits(const StreamLocalLimits & limits_) -{ - throwIfExecutionStarted(is_execution_started, "setLimits"); - - if (limits_.mode == LimitsMode::LIMITS_TOTAL) - throw Exception("Total limits are not supported by PipelineExecutingBlockInputStream", - ErrorCodes::LOGICAL_ERROR); - - /// Local limits may be checked by IBlockInputStream itself. - IBlockInputStream::setLimits(limits_); -} - -void PipelineExecutingBlockInputStream::setQuota(const std::shared_ptr &) -{ - throw Exception("Quota is not supported by PipelineExecutingBlockInputStream", - ErrorCodes::LOGICAL_ERROR); -} - -} diff --git a/src/Processors/Executors/PipelineExecutingBlockInputStream.h b/src/Processors/Executors/PipelineExecutingBlockInputStream.h deleted file mode 100644 index 68497938ad4..00000000000 --- a/src/Processors/Executors/PipelineExecutingBlockInputStream.h +++ /dev/null @@ -1,44 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class QueryPipeline; -class PullingAsyncPipelineExecutor; -class PullingPipelineExecutor; - -/// Implement IBlockInputStream from QueryPipeline. -/// It's a temporary wrapper. -class PipelineExecutingBlockInputStream : public IBlockInputStream -{ -public: - explicit PipelineExecutingBlockInputStream(QueryPipeline pipeline_); - ~PipelineExecutingBlockInputStream() override; - - String getName() const override { return "PipelineExecuting"; } - Block getHeader() const override; - - void cancel(bool kill) override; - - /// Implement IBlockInputStream methods via QueryPipeline. - void setProgressCallback(const ProgressCallback & callback) final; - void setProcessListElement(QueryStatus * elem) final; - void setLimits(const StreamLocalLimits & limits_) final; - void setQuota(const std::shared_ptr & quota_) final; - -protected: - void readPrefixImpl() override; - Block readImpl() override; - -private: - std::unique_ptr pipeline; - /// One of executors is used. - std::unique_ptr executor; /// for single thread. - std::unique_ptr async_executor; /// for many threads. - bool is_execution_started = false; - - void createExecutor(); -}; - -} diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 0616a392027..ec07cee8738 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 819344e4225..fdddfdef2a4 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -225,12 +225,12 @@ Block PullingAsyncPipelineExecutor::getExtremesBlock() return header.cloneWithColumns(extremes.detachColumns()); } -BlockStreamProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() +ProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() { if (lazy_format) return lazy_format->getProfileInfo(); - static BlockStreamProfileInfo profile_info; + static ProfileInfo profile_info; static std::once_flag flag; /// Calculate rows before limit here to avoid race. std::call_once(flag, []() { profile_info.getRowsBeforeLimit(); }); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.h b/src/Processors/Executors/PullingAsyncPipelineExecutor.h index 2ce75aecab7..7e45246ffd6 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.h +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.h @@ -8,7 +8,7 @@ class QueryPipeline; class Block; class Chunk; class LazyOutputFormat; -struct BlockStreamProfileInfo; +struct ProfileInfo; /// Asynchronous pulling executor for QueryPipeline. /// Always creates extra thread. If query is executed in single thread, use PullingPipelineExecutor. @@ -44,7 +44,7 @@ public: Block getExtremesBlock(); /// Get query profile info. - BlockStreamProfileInfo & getProfileInfo(); + ProfileInfo & getProfileInfo(); /// Internal executor data. struct Data; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 7da2a6d3059..a9c73b9f8fb 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include @@ -118,7 +118,7 @@ Block PullingPipelineExecutor::getExtremesBlock() return header.cloneWithColumns(extremes.detachColumns()); } -BlockStreamProfileInfo & PullingPipelineExecutor::getProfileInfo() +ProfileInfo & PullingPipelineExecutor::getProfileInfo() { return pulling_format->getProfileInfo(); } diff --git a/src/Processors/Executors/PullingPipelineExecutor.h b/src/Processors/Executors/PullingPipelineExecutor.h index 878d66bd3d4..e05f4f3738d 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.h +++ b/src/Processors/Executors/PullingPipelineExecutor.h @@ -10,7 +10,7 @@ class Chunk; class QueryPipeline; class PipelineExecutor; class PullingOutputFormat; -struct BlockStreamProfileInfo; +struct ProfileInfo; using PipelineExecutorPtr = std::shared_ptr; @@ -46,7 +46,7 @@ public: Block getExtremesBlock(); /// Get query profile info. - BlockStreamProfileInfo & getProfileInfo(); + ProfileInfo & getProfileInfo(); private: std::atomic_bool has_data_flag = false; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index a1a0755f952..0b6d5334716 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index 2e2b5e9ca1e..c1e851e3425 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index b647338d6fb..ba4dcee6f70 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -72,7 +72,8 @@ public: InputPort & getPort(PortKind kind) { return *std::next(inputs.begin(), kind); } - /// Compatible to IBlockOutputStream interface + /// Compatibility with old interface. + /// TODO: separate formats and processors. void write(const Block & block); diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index 19a94d41044..87caadd93da 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include class Stopwatch; diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 8ccc04faf35..4beb260b64a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp index fd4b2870bea..4d000bb1f35 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.cpp @@ -1,7 +1,6 @@ #include "CapnProtoRowInputFormat.h" #if USE_CAPNP -#include #include #include #include @@ -9,198 +8,40 @@ #include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace DB { namespace ErrorCodes { - extern const int BAD_TYPE_OF_FIELD; - extern const int THERE_IS_NO_COLUMN; extern const int LOGICAL_ERROR; } -static CapnProtoRowInputFormat::NestedField split(const Block & header, size_t i) -{ - CapnProtoRowInputFormat::NestedField field = {{}, i}; - - // Remove leading dot in field definition, e.g. ".msg" -> "msg" - String name(header.safeGetByPosition(i).name); - if (!name.empty() && name[0] == '.') - name.erase(0, 1); - - splitInto<'.', '_'>(field.tokens, name); - return field; -} - - -static Field convertNodeToField(const capnp::DynamicValue::Reader & value) -{ - switch (value.getType()) - { - case capnp::DynamicValue::UNKNOWN: - throw Exception("Unknown field type", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::VOID: - return Field(); - case capnp::DynamicValue::BOOL: - return value.as() ? 1u : 0u; - case capnp::DynamicValue::INT: - return value.as(); - case capnp::DynamicValue::UINT: - return value.as(); - case capnp::DynamicValue::FLOAT: - return value.as(); - case capnp::DynamicValue::TEXT: - { - auto arr = value.as(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::DATA: - { - auto arr = value.as().asChars(); - return String(arr.begin(), arr.size()); - } - case capnp::DynamicValue::LIST: - { - auto list_value = value.as(); - Array res(list_value.size()); - for (auto i : kj::indices(list_value)) - res[i] = convertNodeToField(list_value[i]); - - return res; - } - case capnp::DynamicValue::ENUM: - return value.as().getRaw(); - case capnp::DynamicValue::STRUCT: - { - auto struct_value = value.as(); - const auto & fields = struct_value.getSchema().getFields(); - - Tuple tuple(fields.size()); - for (auto i : kj::indices(fields)) - tuple[i] = convertNodeToField(struct_value.get(fields[i])); - - return tuple; - } - case capnp::DynamicValue::CAPABILITY: - throw Exception("CAPABILITY type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - case capnp::DynamicValue::ANY_POINTER: - throw Exception("ANY_POINTER type not supported", ErrorCodes::BAD_TYPE_OF_FIELD); - } - return Field(); -} - -static capnp::StructSchema::Field getFieldOrThrow(capnp::StructSchema node, const std::string & field) -{ - KJ_IF_MAYBE(child, node.findFieldByName(field)) - return *child; - else - throw Exception("Field " + field + " doesn't exist in schema " + node.getShortDisplayName().cStr(), ErrorCodes::THERE_IS_NO_COLUMN); -} - - -void CapnProtoRowInputFormat::createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader) -{ - /// Columns in a table can map to fields in Cap'n'Proto or to structs. - - /// Store common parents and their tokens in order to backtrack. - std::vector parents; - std::vector parent_tokens; - - capnp::StructSchema cur_reader = reader; - - for (const auto & field : sorted_fields) - { - if (field.tokens.empty()) - throw Exception("Logical error in CapnProtoRowInputFormat", ErrorCodes::LOGICAL_ERROR); - - // Backtrack to common parent - while (field.tokens.size() < parent_tokens.size() + 1 - || !std::equal(parent_tokens.begin(), parent_tokens.end(), field.tokens.begin())) - { - actions.push_back({Action::POP}); - parents.pop_back(); - parent_tokens.pop_back(); - - if (parents.empty()) - { - cur_reader = reader; - break; - } - else - cur_reader = parents.back().getType().asStruct(); - } - - // Go forward - while (parent_tokens.size() + 1 < field.tokens.size()) - { - const auto & token = field.tokens[parents.size()]; - auto node = getFieldOrThrow(cur_reader, token); - if (node.getType().isStruct()) - { - // Descend to field structure - parents.emplace_back(node); - parent_tokens.emplace_back(token); - cur_reader = node.getType().asStruct(); - actions.push_back({Action::PUSH, node}); - } - else if (node.getType().isList()) - { - break; // Collect list - } - else - throw Exception("Field " + token + " is neither Struct nor List", ErrorCodes::BAD_TYPE_OF_FIELD); - } - - // Read field from the structure - auto node = getFieldOrThrow(cur_reader, field.tokens[parents.size()]); - if (node.getType().isList() && !actions.empty() && actions.back().field == node) - { - // The field list here flattens Nested elements into multiple arrays - // In order to map Nested types in Cap'nProto back, they need to be collected - // Since the field names are sorted, the order of field positions must be preserved - // For example, if the fields are { b @0 :Text, a @1 :Text }, the `a` would come first - // even though it's position is second. - auto & columns = actions.back().columns; - auto it = std::upper_bound(columns.cbegin(), columns.cend(), field.pos); - columns.insert(it, field.pos); - } - else - { - actions.push_back({Action::READ, node, {field.pos}}); - } - } -} - -CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info) - : IRowInputFormat(std::move(header), in_, std::move(params_)), parser(std::make_shared()) +CapnProtoRowInputFormat::CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_) + : IRowInputFormat(std::move(header), in_, std::move(params_)) + , parser(std::make_shared()) + , format_settings(format_settings_) + , column_types(getPort().getHeader().getDataTypes()) + , column_names(getPort().getHeader().getNames()) { // Parse the schema and fetch the root object - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - auto schema = parser->impl.parseDiskFile(info.schemaPath(), info.absoluteSchemaPath(), {}); -#pragma GCC diagnostic pop - - root = schema.getNested(info.messageName()).asStruct(); - - /** - * The schema typically consists of fields in various nested structures. - * Here we gather the list of fields and sort them in a way so that fields in the same structure are adjacent, - * and the nesting level doesn't decrease to make traversal easier. - */ - const auto & sample = getPort().getHeader(); - NestedFieldList list; - size_t num_columns = sample.columns(); - for (size_t i = 0; i < num_columns; ++i) - list.push_back(split(sample, i)); - - // Order list first by value of strings then by length of string vector. - std::sort(list.begin(), list.end(), [](const NestedField & a, const NestedField & b) { return a.tokens < b.tokens; }); - createActions(list, root); + root = parser->getMessageSchema(info); + checkCapnProtoSchemaStructure(root, getPort().getHeader(), format_settings.capn_proto.enum_comparing_mode); } kj::Array CapnProtoRowInputFormat::readMessage() @@ -233,6 +74,191 @@ kj::Array CapnProtoRowInputFormat::readMessage() return msg; } +static void insertSignedInteger(IColumn & column, const DataTypePtr & column_type, Int64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Int8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Int64: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime64: + assert_cast &>(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a signed integer."); + } +} + +static void insertUnsignedInteger(IColumn & column, const DataTypePtr & column_type, UInt64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::UInt8: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + assert_cast(column).insertValue(value); + break; + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::UInt64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not an unsigned integer."); + } +} + +static void insertFloat(IColumn & column, const DataTypePtr & column_type, Float64 value) +{ + switch (column_type->getTypeId()) + { + case TypeIndex::Float32: + assert_cast(column).insertValue(value); + break; + case TypeIndex::Float64: + assert_cast(column).insertValue(value); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column type is not a float."); + } +} + +template +static void insertString(IColumn & column, Value value) +{ + column.insertData(reinterpret_cast(value.begin()), value.size()); +} + +template +static void insertEnum(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicEnum & enum_value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + auto enumerant = *kj::_::readMaybe(enum_value.getEnumerant()); + auto enum_type = assert_cast *>(column_type.get()); + DataTypePtr nested_type = std::make_shared>(); + switch (enum_comparing_mode) + { + case FormatSettings::EnumComparingMode::BY_VALUES: + insertSignedInteger(column, nested_type, Int64(enumerant.getOrdinal())); + return; + case FormatSettings::EnumComparingMode::BY_NAMES: + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(String(enumerant.getProto().getName())))); + return; + case FormatSettings::EnumComparingMode::BY_NAMES_CASE_INSENSITIVE: + { + /// Find the same enum name case insensitive. + String enum_name = enumerant.getProto().getName(); + for (auto & name : enum_type->getAllRegisteredNames()) + { + if (compareEnumNames(name, enum_name, enum_comparing_mode)) + { + insertSignedInteger(column, nested_type, Int64(enum_type->getValue(name))); + break; + } + } + } + } +} + +static void insertValue(IColumn & column, const DataTypePtr & column_type, const capnp::DynamicValue::Reader & value, FormatSettings::EnumComparingMode enum_comparing_mode) +{ + if (column_type->lowCardinality()) + { + auto & lc_column = assert_cast(column); + auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); + auto dict_type = assert_cast(column_type.get())->getDictionaryType(); + insertValue(*tmp_column, dict_type, value, enum_comparing_mode); + lc_column.insertFromFullColumn(*tmp_column, 0); + return; + } + + switch (value.getType()) + { + case capnp::DynamicValue::Type::INT: + insertSignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::UINT: + insertUnsignedInteger(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::FLOAT: + insertFloat(column, column_type, value.as()); + break; + case capnp::DynamicValue::Type::BOOL: + insertUnsignedInteger(column, column_type, UInt64(value.as())); + break; + case capnp::DynamicValue::Type::DATA: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::TEXT: + insertString(column, value.as()); + break; + case capnp::DynamicValue::Type::ENUM: + if (column_type->getTypeId() == TypeIndex::Enum8) + insertEnum(column, column_type, value.as(), enum_comparing_mode); + else + insertEnum(column, column_type, value.as(), enum_comparing_mode); + break; + case capnp::DynamicValue::LIST: + { + auto list_value = value.as(); + auto & column_array = assert_cast(column); + auto & offsets = column_array.getOffsets(); + offsets.push_back(offsets.back() + list_value.size()); + + auto & nested_column = column_array.getData(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + for (const auto & nested_value : list_value) + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + break; + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_value = value.as(); + if (column_type->isNullable()) + { + auto & nullable_column = assert_cast(column); + auto field = *kj::_::readMaybe(struct_value.which()); + if (field.getType().isVoid()) + nullable_column.insertDefault(); + else + { + auto & nested_column = nullable_column.getNestedColumn(); + auto nested_type = assert_cast(column_type.get())->getNestedType(); + auto nested_value = struct_value.get(field); + insertValue(nested_column, nested_type, nested_value, enum_comparing_mode); + nullable_column.getNullMapData().push_back(0); + } + } + else + { + auto & tuple_column = assert_cast(column); + const auto * tuple_type = assert_cast(column_type.get()); + for (size_t i = 0; i != tuple_column.tupleSize(); ++i) + insertValue( + tuple_column.getColumn(i), + tuple_type->getElements()[i], + struct_value.get(tuple_type->getElementNames()[i]), + enum_comparing_mode); + } + break; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto value type."); + } +} + bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { if (in->eof()) @@ -245,51 +271,12 @@ bool CapnProtoRowInputFormat::readRow(MutableColumns & columns, RowReadExtension #else capnp::FlatArrayMessageReader msg(array); #endif - std::vector stack; - stack.push_back(msg.getRoot(root)); - for (auto action : actions) + auto root_reader = msg.getRoot(root); + for (size_t i = 0; i != columns.size(); ++i) { - switch (action.type) - { - case Action::READ: - { - Field value = convertNodeToField(stack.back().get(action.field)); - if (action.columns.size() > 1) - { - // Nested columns must be flattened into several arrays - // e.g. Array(Tuple(x ..., y ...)) -> Array(x ...), Array(y ...) - const auto & collected = DB::get(value); - size_t size = collected.size(); - // The flattened array contains an array of a part of the nested tuple - Array flattened(size); - for (size_t column_index = 0; column_index < action.columns.size(); ++column_index) - { - // Populate array with a single tuple elements - for (size_t off = 0; off < size; ++off) - { - const auto & tuple = DB::get(collected[off]); - flattened[off] = tuple[column_index]; - } - auto & col = columns[action.columns[column_index]]; - col->insert(flattened); - } - } - else - { - auto & col = columns[action.columns[0]]; - col->insert(value); - } - - break; - } - case Action::POP: - stack.pop_back(); - break; - case Action::PUSH: - stack.push_back(stack.back().get(action.field).as()); - break; - } + auto value = getReaderByColumnName(root_reader, column_names[i]); + insertValue(*columns[i], column_types[i], value, format_settings.capn_proto.enum_comparing_mode); } return true; @@ -302,8 +289,7 @@ void registerInputFormatCapnProto(FormatFactory & factory) [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "CapnProto", true, - settings.schema.is_server, settings.schema.format_schema_path)); + FormatSchemaInfo(settings, "CapnProto", true), settings); }); } diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 0957cd1d681..fc30cf11237 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -4,8 +4,8 @@ #if USE_CAPNP #include +#include #include -#include namespace DB { @@ -22,18 +22,7 @@ class ReadBuffer; class CapnProtoRowInputFormat : public IRowInputFormat { public: - struct NestedField - { - std::vector tokens; - size_t pos; - }; - using NestedFieldList = std::vector; - - /** schema_dir - base path for schema files - * schema_file - location of the capnproto schema, e.g. "schema.capnp" - * root_object - name to the root object, e.g. "Message" - */ - CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info); + CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); String getName() const override { return "CapnProtoRowInputFormat"; } @@ -42,34 +31,11 @@ public: private: kj::Array readMessage(); - // Build a traversal plan from a sorted list of fields - void createActions(const NestedFieldList & sorted_fields, capnp::StructSchema reader); - - /* Action for state machine for traversing nested structures. */ - using BlockPositionList = std::vector; - struct Action - { - enum Type { POP, PUSH, READ }; - Type type{}; - capnp::StructSchema::Field field{}; - BlockPositionList columns{}; - }; - - // Wrapper for classes that could throw in destructor - // https://github.com/capnproto/capnproto/issues/553 - template - struct DestructorCatcher - { - T impl; - template - DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} - ~DestructorCatcher() noexcept try { } catch (...) { return; } - }; - using SchemaParser = DestructorCatcher; - - std::shared_ptr parser; + std::shared_ptr parser; capnp::StructSchema root; - std::vector actions; + const FormatSettings format_settings; + DataTypes column_types; + Names column_names; }; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp new file mode 100644 index 00000000000..58f88c5c7cf --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -0,0 +1,268 @@ +#include +#if USE_CAPNP + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + +CapnProtoOutputStream::CapnProtoOutputStream(WriteBuffer & out_) : out(out_) +{ +} + +void CapnProtoOutputStream::write(const void * buffer, size_t size) +{ + out.write(reinterpret_cast(buffer), size); +} + +CapnProtoRowOutputFormat::CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), column_names(header_.getNames()), column_types(header_.getDataTypes()), output_stream(std::make_unique(out_)), format_settings(format_settings_) +{ + schema = schema_parser.getMessageSchema(info); + checkCapnProtoSchemaStructure(schema, getPort(PortKind::Main).getHeader(), format_settings.capn_proto.enum_comparing_mode); +} + +template +static capnp::DynamicEnum getDynamicEnum( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + const capnp::EnumSchema & enum_schema, + FormatSettings::EnumComparingMode mode) +{ + const auto * enum_data_type = assert_cast *>(data_type.get()); + EnumValue enum_value = column->getInt(row_num); + if (mode == FormatSettings::EnumComparingMode::BY_VALUES) + return capnp::DynamicEnum(enum_schema, enum_value); + + auto enum_name = enum_data_type->getNameForValue(enum_value); + for (const auto enumerant : enum_schema.getEnumerants()) + { + if (compareEnumNames(String(enum_name), enumerant.getProto().getName(), mode)) + return capnp::DynamicEnum(enumerant); + } + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot convert CLickHouse Enum value to CapnProto Enum"); +} + +static capnp::DynamicValue::Builder initStructFieldBuilder(const ColumnPtr & column, size_t row_num, capnp::DynamicStruct::Builder & struct_builder, capnp::StructSchema::Field field) +{ + if (const auto * array_column = checkAndGetColumn(*column)) + { + size_t size = array_column->getOffsets()[row_num] - array_column->getOffsets()[row_num - 1]; + return struct_builder.init(field, size); + } + + if (field.getType().isStruct()) + return struct_builder.init(field); + + return struct_builder.get(field); +} + +static std::optional convertToDynamicValue( + const ColumnPtr & column, + const DataTypePtr & data_type, + size_t row_num, + capnp::DynamicValue::Builder builder, + FormatSettings::EnumComparingMode enum_comparing_mode, + std::vector> & temporary_text_data_storage) +{ + /// Here we don't do any types validation, because we did it in CapnProtoRowOutputFormat constructor. + + if (data_type->lowCardinality()) + { + const auto * lc_column = assert_cast(column.get()); + const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); + size_t index = lc_column->getIndexAt(row_num); + return convertToDynamicValue(lc_column->getDictionary().getNestedColumn(), dict_type, index, builder, enum_comparing_mode, temporary_text_data_storage); + } + + switch (builder.getType()) + { + case capnp::DynamicValue::Type::INT: + /// We allow output DateTime64 as Int64. + if (WhichDataType(data_type).isDateTime64()) + return capnp::DynamicValue::Reader(assert_cast *>(column.get())->getElement(row_num)); + return capnp::DynamicValue::Reader(column->getInt(row_num)); + case capnp::DynamicValue::Type::UINT: + return capnp::DynamicValue::Reader(column->getUInt(row_num)); + case capnp::DynamicValue::Type::BOOL: + return capnp::DynamicValue::Reader(column->getBool(row_num)); + case capnp::DynamicValue::Type::FLOAT: + return capnp::DynamicValue::Reader(column->getFloat64(row_num)); + case capnp::DynamicValue::Type::ENUM: + { + auto enum_schema = builder.as().getSchema(); + if (data_type->getTypeId() == TypeIndex::Enum8) + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + return capnp::DynamicValue::Reader( + getDynamicEnum(column, data_type, row_num, enum_schema, enum_comparing_mode)); + } + case capnp::DynamicValue::Type::DATA: + { + auto data = column->getDataAt(row_num); + return capnp::DynamicValue::Reader(capnp::Data::Reader(reinterpret_cast(data.data), data.size)); + } + case capnp::DynamicValue::Type::TEXT: + { + /// In TEXT type data should be null-terminated, but ClickHouse String data could not be. + /// To make data null-terminated we should copy it to temporary String object, but + /// capnp::Text::Reader works only with pointer to the data and it's size, so we should + /// guarantee that new String object life time is longer than capnp::Text::Reader life time. + /// To do this we store new String object in a temporary storage, passed in this function + /// by reference. We use unique_ptr instead of just String to avoid pointers + /// invalidation on vector reallocation. + temporary_text_data_storage.push_back(std::make_unique(column->getDataAt(row_num))); + auto & data = temporary_text_data_storage.back(); + return capnp::DynamicValue::Reader(capnp::Text::Reader(data->data(), data->size())); + } + case capnp::DynamicValue::Type::STRUCT: + { + auto struct_builder = builder.as(); + auto nested_struct_schema = struct_builder.getSchema(); + /// Struct can be represent Tuple or Naullable (named union with two fields) + if (data_type->isNullable()) + { + const auto * nullable_type = assert_cast(data_type.get()); + const auto * nullable_column = assert_cast(column.get()); + auto fields = nested_struct_schema.getUnionFields(); + if (nullable_column->isNullAt(row_num)) + { + auto null_field = fields[0].getType().isVoid() ? fields[0] : fields[1]; + struct_builder.set(null_field, capnp::Void()); + } + else + { + auto value_field = fields[0].getType().isVoid() ? fields[1] : fields[0]; + struct_builder.clear(value_field); + const auto & nested_column = nullable_column->getNestedColumnPtr(); + auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); + auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(value_field, std::move(*value)); + } + } + else + { + const auto * tuple_data_type = assert_cast(data_type.get()); + auto nested_types = tuple_data_type->getElements(); + const auto & nested_columns = assert_cast(column.get())->getColumns(); + for (const auto & name : tuple_data_type->getElementNames()) + { + auto pos = tuple_data_type->getPositionByName(name); + auto field_builder + = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); + auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(name, std::move(*value)); + } + } + return std::nullopt; + } + case capnp::DynamicValue::Type::LIST: + { + auto list_builder = builder.as(); + const auto * array_column = assert_cast(column.get()); + const auto & nested_column = array_column->getDataPtr(); + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + const auto & offsets = array_column->getOffsets(); + auto offset = offsets[row_num - 1]; + size_t size = offsets[row_num] - offset; + + const auto * nested_array_column = checkAndGetColumn(*nested_column); + for (size_t i = 0; i != size; ++i) + { + capnp::DynamicValue::Builder value_builder; + /// For nested arrays we need to initialize nested list builder. + if (nested_array_column) + { + const auto & nested_offset = nested_array_column->getOffsets(); + size_t nested_array_size = nested_offset[offset + i] - nested_offset[offset + i - 1]; + value_builder = list_builder.init(i, nested_array_size); + } + else + value_builder = list_builder[i]; + + auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); + if (value) + list_builder.set(i, std::move(*value)); + } + return std::nullopt; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected CapnProto type."); + } +} + +void CapnProtoRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + capnp::MallocMessageBuilder message; + /// Temporary storage for data that will be outputted in fields with CapnProto type TEXT. + /// See comment in convertToDynamicValue() for more details. + std::vector> temporary_text_data_storage; + capnp::DynamicStruct::Builder root = message.initRoot(schema); + for (size_t i = 0; i != columns.size(); ++i) + { + auto [struct_builder, field] = getStructBuilderAndFieldByColumnName(root, column_names[i]); + auto field_builder = initStructFieldBuilder(columns[i], row_num, struct_builder, field); + auto value = convertToDynamicValue(columns[i], column_types[i], row_num, field_builder, format_settings.capn_proto.enum_comparing_mode, temporary_text_data_storage); + if (value) + struct_builder.set(field, *value); + } + + capnp::writeMessage(*output_stream, message); +} + +void registerOutputFormatCapnProto(FormatFactory & factory) +{ + factory.registerOutputFormat("CapnProto", []( + WriteBuffer & buf, + const Block & sample, + const RowOutputFormatParams & params, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, params, FormatSchemaInfo(format_settings, "CapnProto", true), format_settings); + }); +} + +} + +#else + +namespace DB +{ +class FormatFactory; +void registerOutputFormatCapnProto(FormatFactory &) {} +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h new file mode 100644 index 00000000000..0f321071d62 --- /dev/null +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -0,0 +1,53 @@ +#pragma once + +#include "config_formats.h" +#if USE_CAPNP + +#include +#include +#include +#include +#include +#include + +namespace DB +{ +class CapnProtoOutputStream : public kj::OutputStream +{ +public: + CapnProtoOutputStream(WriteBuffer & out_); + + void write(const void * buffer, size_t size) override; + +private: + WriteBuffer & out; +}; + +class CapnProtoRowOutputFormat : public IRowOutputFormat +{ +public: + CapnProtoRowOutputFormat( + WriteBuffer & out_, + const Block & header_, + const RowOutputFormatParams & params_, + const FormatSchemaInfo & info, + const FormatSettings & format_settings_); + + String getName() const override { return "CapnProtoRowOutputFormat"; } + + void write(const Columns & columns, size_t row_num) override; + + void writeField(const IColumn &, const ISerialization &, size_t) override { } + +private: + Names column_names; + DataTypes column_types; + capnp::StructSchema schema; + std::unique_ptr output_stream; + const FormatSettings format_settings; + CapnProtoSchemaParser schema_parser; +}; + +} + +#endif // USE_CAPNP diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 5daa38967b9..07cf4670981 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp index a5e6b7ec480..df7b7102739 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.cpp @@ -67,8 +67,7 @@ void registerInputFormatProtobuf(FormatFactory & factory) const FormatSettings & settings) { return std::make_shared(buf, sample, std::move(params), - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", true, - settings.schema.is_server, settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), with_length_delimiter); }); } diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 12c5e98797a..29cd9be79bc 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -64,9 +64,7 @@ void registerOutputFormatProtobuf(FormatFactory & factory) { return std::make_shared( buf, header, params, - FormatSchemaInfo(settings.schema.format_schema, "Protobuf", - true, settings.schema.is_server, - settings.schema.format_schema_path), + FormatSchemaInfo(settings, "Protobuf", true), settings, with_length_delimiter); }); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 1ff52c9f695..c1cf0a904ea 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/src/Processors/Formats/InputStreamFromInputFormat.h b/src/Processors/Formats/InputStreamFromInputFormat.h deleted file mode 100644 index 339f559ac9b..00000000000 --- a/src/Processors/Formats/InputStreamFromInputFormat.h +++ /dev/null @@ -1,67 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -class InputStreamFromInputFormat : public IBlockInputStream -{ -public: - explicit InputStreamFromInputFormat(InputFormatPtr input_format_) - : input_format(std::move(input_format_)) - , port(input_format->getPort().getHeader(), input_format.get()) - { - connect(input_format->getPort(), port); - port.setNeeded(); - } - - String getName() const override { return input_format->getName(); } - Block getHeader() const override { return input_format->getPort().getHeader(); } - - void cancel(bool kill) override - { - input_format->cancel(); - IBlockInputStream::cancel(kill); - } - - const BlockMissingValues & getMissingValues() const override { return input_format->getMissingValues(); } - -protected: - - Block readImpl() override - { - while (true) - { - auto status = input_format->prepare(); - - switch (status) - { - case IProcessor::Status::Ready: - input_format->work(); - break; - - case IProcessor::Status::Finished: - return {}; - - case IProcessor::Status::PortFull: - return input_format->getPort().getHeader().cloneWithColumns(port.pull().detachColumns()); - - case IProcessor::Status::NeedData: - case IProcessor::Status::Async: - case IProcessor::Status::ExpandPipeline: - throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR); - } - } - } - -private: - InputFormatPtr input_format; - InputPort port; -}; - -} diff --git a/src/Processors/Formats/LazyOutputFormat.h b/src/Processors/Formats/LazyOutputFormat.h index 50dc87f2e70..123d393a871 100644 --- a/src/Processors/Formats/LazyOutputFormat.h +++ b/src/Processors/Formats/LazyOutputFormat.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include namespace DB @@ -25,7 +25,7 @@ public: bool isFinished() { return queue.isFinishedAndEmpty(); } - BlockStreamProfileInfo & getProfileInfo() { return info; } + ProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; @@ -59,7 +59,7 @@ private: /// Is not used. static WriteBuffer out; - BlockStreamProfileInfo info; + ProfileInfo info; }; } diff --git a/src/Processors/Formats/OutputStreamToOutputFormat.cpp b/src/Processors/Formats/OutputStreamToOutputFormat.cpp deleted file mode 100644 index 5d4e7832327..00000000000 --- a/src/Processors/Formats/OutputStreamToOutputFormat.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include -#include - -namespace DB -{ - -Block OutputStreamToOutputFormat::getHeader() const -{ - return output_format->getPort(IOutputFormat::PortKind::Main).getHeader(); -} - -void OutputStreamToOutputFormat::write(const Block & block) -{ - output_format->write(block); -} - -void OutputStreamToOutputFormat::writePrefix() { output_format->doWritePrefix(); } -void OutputStreamToOutputFormat::writeSuffix() { output_format->doWriteSuffix(); } - -void OutputStreamToOutputFormat::flush() { output_format->flush(); } - -void OutputStreamToOutputFormat::setRowsBeforeLimit(size_t rows_before_limit) -{ - output_format->setRowsBeforeLimit(rows_before_limit); -} - -void OutputStreamToOutputFormat::setTotals(const Block & totals) -{ - if (totals) - output_format->setTotals(totals); -} - -void OutputStreamToOutputFormat::setExtremes(const Block & extremes) -{ - if (extremes) - output_format->setExtremes(extremes); -} - -void OutputStreamToOutputFormat::onProgress(const Progress & progress) { output_format->onProgress(progress); } - -std::string OutputStreamToOutputFormat::getContentType() const { return output_format->getContentType(); } - -} diff --git a/src/Processors/Formats/OutputStreamToOutputFormat.h b/src/Processors/Formats/OutputStreamToOutputFormat.h deleted file mode 100644 index a85de12b49d..00000000000 --- a/src/Processors/Formats/OutputStreamToOutputFormat.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once -#include - -namespace DB -{ - - -class IOutputFormat; - -using OutputFormatPtr = std::shared_ptr; - -/// Wrapper. Implements IBlockOutputStream interface using IOutputFormat object. -class OutputStreamToOutputFormat : public IBlockOutputStream -{ -public: - explicit OutputStreamToOutputFormat(OutputFormatPtr output_format_) : output_format(std::move(output_format_)) {} - - Block getHeader() const override; - - void write(const Block & block) override; - - void writePrefix() override; - void writeSuffix() override; - - void flush() override; - - void setRowsBeforeLimit(size_t rows_before_limit) override; - void setTotals(const Block & totals) override; - void setExtremes(const Block & extremes) override; - - void onProgress(const Progress & progress) override; - - std::string getContentType() const override; - -private: - OutputFormatPtr output_format; -}; - -} diff --git a/src/Processors/Formats/PullingOutputFormat.h b/src/Processors/Formats/PullingOutputFormat.h index 53b2086712f..a231b7679f3 100644 --- a/src/Processors/Formats/PullingOutputFormat.h +++ b/src/Processors/Formats/PullingOutputFormat.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { @@ -20,7 +20,7 @@ public: Chunk getTotals(); Chunk getExtremes(); - BlockStreamProfileInfo & getProfileInfo() { return info; } + ProfileInfo & getProfileInfo() { return info; } void setRowsBeforeLimit(size_t rows_before_limit) override; @@ -38,7 +38,7 @@ private: std::atomic_bool & has_data_flag; - BlockStreamProfileInfo info; + ProfileInfo info; /// Is not used. static WriteBuffer out; diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index e8bf90c2b31..b0cdf4c8a3c 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -16,7 +16,7 @@ public: const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index 87c466f31e8..a37e1c8402f 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -20,7 +20,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/FinishAggregatingInOrderTransform.h b/src/Processors/Merges/FinishAggregatingInOrderTransform.h index 6d5e334311f..58bd399afda 100644 --- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h +++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h @@ -19,7 +19,7 @@ public: SortDescription description, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, params, diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index 46272f00eed..e6307c629ea 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -15,7 +15,7 @@ public: SortDescription description_, size_t max_block_size, Graphite::Params params_, time_t time_of_merge_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index cba78390c97..52acf36a4d7 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -15,10 +15,10 @@ IMergingTransformBase::IMergingTransformBase( const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_) + UInt64 limit_hint_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) , have_all_inputs(have_all_inputs_) - , has_limit_below_one_block(has_limit_below_one_block_) + , limit_hint(limit_hint_) { } @@ -79,7 +79,10 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs() /// setNotNeeded after reading first chunk, because in optimismtic case /// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n') /// we won't have to read any chunks anymore; - auto chunk = input.pull(has_limit_below_one_block); + auto chunk = input.pull(limit_hint != 0); + if (limit_hint && chunk.getNumRows() < limit_hint) + input.setNeeded(); + if (!chunk.hasRows()) { if (!input.isFinished()) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 8b0a44ae025..4da49b8155c 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -17,7 +17,7 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_); + UInt64 limit_hint_); OutputPort & getOutputPort() { return outputs.front(); } @@ -67,7 +67,7 @@ private: std::vector input_states; std::atomic have_all_inputs; bool is_initialized = false; - bool has_limit_below_one_block = false; + UInt64 limit_hint = 0; IProcessor::Status prepareInitializeInputs(); }; @@ -83,9 +83,9 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - bool has_limit_below_one_block_, + UInt64 limit_hint_, Args && ... args) - : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, has_limit_below_one_block_) + : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_) , algorithm(std::forward(args) ...) { } diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 667972e3cf6..ec864b561e9 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -13,13 +13,12 @@ MergingSortedTransform::MergingSortedTransform( SortDescription description_, size_t max_block_size, UInt64 limit_, - bool has_limit_below_one_block_, WriteBuffer * out_row_sources_buf_, bool quiet_, bool use_average_block_sizes, bool have_all_inputs_) : IMergingTransform( - num_inputs, header, header, have_all_inputs_, has_limit_below_one_block_, + num_inputs, header, header, have_all_inputs_, limit_, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h index 1fa9b1275bd..93bd36d8aec 100644 --- a/src/Processors/Merges/MergingSortedTransform.h +++ b/src/Processors/Merges/MergingSortedTransform.h @@ -17,7 +17,6 @@ public: SortDescription description, size_t max_block_size, UInt64 limit_ = 0, - bool has_limit_below_one_block_ = false, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false, bool use_average_block_sizes = false, diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index e760cdf0d2b..dfb386684fc 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -18,7 +18,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index 0287caed5aa..0530ac2e96b 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -19,7 +19,7 @@ public: const Names & partition_key_columns, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index f260e20f1da..5eced1cb58d 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -19,7 +19,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, header, num_inputs, std::move(description_), diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 023f9016cc4..53036360b8d 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 8583e5be485..c7d67c75894 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/ArrayJoinStep.cpp b/src/Processors/QueryPlan/ArrayJoinStep.cpp index 35b974baa83..3ca5b9109e6 100644 --- a/src/Processors/QueryPlan/ArrayJoinStep.cpp +++ b/src/Processors/QueryPlan/ArrayJoinStep.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 83a4c291bf2..45c3719ebca 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/CreatingSetsStep.h b/src/Processors/QueryPlan/CreatingSetsStep.h index 8d20c764e8a..e20c28e10f4 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.h +++ b/src/Processors/QueryPlan/CreatingSetsStep.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp index 3d61d3ef36b..23c5115ec68 100644 --- a/src/Processors/QueryPlan/CubeStep.cpp +++ b/src/Processors/QueryPlan/CubeStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/CubeStep.h b/src/Processors/QueryPlan/CubeStep.h index 45077d78a90..1079bed5398 100644 --- a/src/Processors/QueryPlan/CubeStep.h +++ b/src/Processors/QueryPlan/CubeStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp index d53d1fa9310..5aeb33fdc7b 100644 --- a/src/Processors/QueryPlan/DistinctStep.cpp +++ b/src/Processors/QueryPlan/DistinctStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/DistinctStep.h b/src/Processors/QueryPlan/DistinctStep.h index b08e93dffa9..a48a779425d 100644 --- a/src/Processors/QueryPlan/DistinctStep.h +++ b/src/Processors/QueryPlan/DistinctStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp index b4ff1a1281c..33d2ad6e1cf 100644 --- a/src/Processors/QueryPlan/ExpressionStep.cpp +++ b/src/Processors/QueryPlan/ExpressionStep.cpp @@ -1,10 +1,9 @@ #include #include -#include +#include #include #include #include -#include #include #include diff --git a/src/Processors/QueryPlan/ExtremesStep.cpp b/src/Processors/QueryPlan/ExtremesStep.cpp index 117ccd414ca..4524b9883d6 100644 --- a/src/Processors/QueryPlan/ExtremesStep.cpp +++ b/src/Processors/QueryPlan/ExtremesStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 204559ecc3b..223892aa528 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 483055810cf..df75c37dc97 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/FinishSortingStep.cpp b/src/Processors/QueryPlan/FinishSortingStep.cpp deleted file mode 100644 index c219c09f3bd..00000000000 --- a/src/Processors/QueryPlan/FinishSortingStep.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = true, - .preserves_number_of_streams = false, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -FinishSortingStep::FinishSortingStep( - const DataStream & input_stream_, - SortDescription prefix_description_, - SortDescription result_description_, - size_t max_block_size_, - UInt64 limit_, - bool has_filtration_) - : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_)) - , prefix_description(std::move(prefix_description_)) - , result_description(std::move(result_description_)) - , max_block_size(max_block_size_) - , limit(limit_) - , has_filtration(has_filtration_) -{ - /// TODO: check input_stream is sorted by prefix_description. - output_stream->sort_description = result_description; - output_stream->sort_mode = DataStream::SortMode::Stream; -} - -void FinishSortingStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void FinishSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - bool need_finish_sorting = (prefix_description.size() < result_description.size()); - if (pipeline.getNumStreams() > 1) - { - UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); - bool has_limit_below_one_block = !has_filtration && limit_for_merging && limit_for_merging < max_block_size; - auto transform = std::make_shared( - pipeline.getHeader(), - pipeline.getNumStreams(), - prefix_description, - max_block_size, - limit_for_merging, - has_limit_below_one_block); - - pipeline.addTransform(std::move(transform)); - } - - if (need_finish_sorting) - { - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; - - return std::make_shared(header, result_description, limit); - }); - - /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform - pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr - { - return std::make_shared( - header, prefix_description, result_description, max_block_size, limit); - }); - } -} - -void FinishSortingStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - - settings.out << prefix << "Prefix sort description: "; - dumpSortDescription(prefix_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - settings.out << prefix << "Result sort description: "; - dumpSortDescription(result_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void FinishSortingStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Prefix Sort Description", explainSortDescription(prefix_description, input_streams.front().header)); - map.add("Result Sort Description", explainSortDescription(result_description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/FinishSortingStep.h b/src/Processors/QueryPlan/FinishSortingStep.h deleted file mode 100644 index ac34aea9df4..00000000000 --- a/src/Processors/QueryPlan/FinishSortingStep.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once -#include -#include - -namespace DB -{ - -/// Finish sorting of pre-sorted data. See FinishSortingTransform. -class FinishSortingStep : public ITransformingStep -{ -public: - FinishSortingStep( - const DataStream & input_stream_, - SortDescription prefix_description_, - SortDescription result_description_, - size_t max_block_size_, - UInt64 limit_, - bool has_filtration_); - - String getName() const override { return "FinishSorting"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription prefix_description; - SortDescription result_description; - size_t max_block_size; - UInt64 limit; - bool has_filtration; -}; - -} diff --git a/src/Processors/QueryPlan/ISourceStep.cpp b/src/Processors/QueryPlan/ISourceStep.cpp index 61c0a9254cd..0644d9b44eb 100644 --- a/src/Processors/QueryPlan/ISourceStep.cpp +++ b/src/Processors/QueryPlan/ISourceStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp index 1c7f836378f..629fb89be1e 100644 --- a/src/Processors/QueryPlan/ITransformingStep.cpp +++ b/src/Processors/QueryPlan/ITransformingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index a4d81e69fe0..c031303cc7f 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 9c5f8ae2e5f..494a2a6aa0e 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/LimitByStep.cpp b/src/Processors/QueryPlan/LimitByStep.cpp index 12ad933a159..39086e995fc 100644 --- a/src/Processors/QueryPlan/LimitByStep.cpp +++ b/src/Processors/QueryPlan/LimitByStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp index 3db59e0684a..8c5e3e3c87c 100644 --- a/src/Processors/QueryPlan/LimitStep.cpp +++ b/src/Processors/QueryPlan/LimitStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/MergeSortingStep.cpp b/src/Processors/QueryPlan/MergeSortingStep.cpp deleted file mode 100644 index 820bbc31b74..00000000000 --- a/src/Processors/QueryPlan/MergeSortingStep.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -MergeSortingStep::MergeSortingStep( - const DataStream & input_stream, - const SortDescription & description_, - size_t max_merged_block_size_, - UInt64 limit_, - size_t max_bytes_before_remerge_, - double remerge_lowered_memory_bytes_ratio_, - size_t max_bytes_before_external_sort_, - VolumePtr tmp_volume_, - size_t min_free_disk_space_) - : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) - , description(description_) - , max_merged_block_size(max_merged_block_size_) - , limit(limit_) - , max_bytes_before_remerge(max_bytes_before_remerge_) - , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_) - , max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_) - , min_free_disk_space(min_free_disk_space_) -{ - /// TODO: check input_stream is partially sorted by the same description. - output_stream->sort_description = description; - output_stream->sort_mode = input_stream.has_single_port ? DataStream::SortMode::Stream - : DataStream::SortMode::Port; -} - -void MergeSortingStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void MergeSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return nullptr; - - return std::make_shared( - header, description, max_merged_block_size, limit, - max_bytes_before_remerge / pipeline.getNumStreams(), - remerge_lowered_memory_bytes_ratio, - max_bytes_before_external_sort, - tmp_volume, - min_free_disk_space); - }); -} - -void MergeSortingStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - settings.out << prefix << "Sort description: "; - dumpSortDescription(description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void MergeSortingStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Sort Description", explainSortDescription(description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/MergeSortingStep.h b/src/Processors/QueryPlan/MergeSortingStep.h deleted file mode 100644 index d5daa041256..00000000000 --- a/src/Processors/QueryPlan/MergeSortingStep.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace DB -{ - -/// Sorts stream of data. See MergeSortingTransform. -class MergeSortingStep : public ITransformingStep -{ -public: - explicit MergeSortingStep( - const DataStream & input_stream, - const SortDescription & description_, - size_t max_merged_block_size_, - UInt64 limit_, - size_t max_bytes_before_remerge_, - double remerge_lowered_memory_bytes_ratio_, - size_t max_bytes_before_external_sort_, - VolumePtr tmp_volume_, - size_t min_free_disk_space_); - - String getName() const override { return "MergeSorting"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription description; - size_t max_merged_block_size; - UInt64 limit; - - size_t max_bytes_before_remerge; - double remerge_lowered_memory_bytes_ratio; - size_t max_bytes_before_external_sort; - VolumePtr tmp_volume; - size_t min_free_disk_space; -}; - -} diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index d02be59ae84..8dfb9f9c923 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h index 9171512571a..eeead41b5f9 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.h +++ b/src/Processors/QueryPlan/MergingAggregatedStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/MergingSortedStep.cpp b/src/Processors/QueryPlan/MergingSortedStep.cpp deleted file mode 100644 index 87d1af4d2bd..00000000000 --- a/src/Processors/QueryPlan/MergingSortedStep.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = true, - .preserves_number_of_streams = false, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -MergingSortedStep::MergingSortedStep( - const DataStream & input_stream, - SortDescription sort_description_, - size_t max_block_size_, - UInt64 limit_) - : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) - , sort_description(std::move(sort_description_)) - , max_block_size(max_block_size_) - , limit(limit_) -{ - /// TODO: check input_stream is partially sorted (each port) by the same description. - output_stream->sort_description = sort_description; - output_stream->sort_mode = DataStream::SortMode::Stream; -} - -void MergingSortedStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void MergingSortedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - /// If there are several streams, then we merge them into one - if (pipeline.getNumStreams() > 1) - { - - auto transform = std::make_shared( - pipeline.getHeader(), - pipeline.getNumStreams(), - sort_description, - max_block_size, limit); - - pipeline.addTransform(std::move(transform)); - } -} - -void MergingSortedStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - settings.out << prefix << "Sort description: "; - dumpSortDescription(sort_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void MergingSortedStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Sort Description", explainSortDescription(sort_description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/MergingSortedStep.h b/src/Processors/QueryPlan/MergingSortedStep.h deleted file mode 100644 index 5d27e59ab76..00000000000 --- a/src/Processors/QueryPlan/MergingSortedStep.h +++ /dev/null @@ -1,36 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace DB -{ - -/// Merge streams of data into single sorted stream. -class MergingSortedStep : public ITransformingStep -{ -public: - explicit MergingSortedStep( - const DataStream & input_stream, - SortDescription sort_description_, - size_t max_block_size_, - UInt64 limit_ = 0); - - String getName() const override { return "MergingSorted"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription sort_description; - size_t max_block_size; - UInt64 limit; -}; - -} diff --git a/src/Processors/QueryPlan/OffsetStep.cpp b/src/Processors/QueryPlan/OffsetStep.cpp index b48327eb36c..e0c70ba2f28 100644 --- a/src/Processors/QueryPlan/OffsetStep.cpp +++ b/src/Processors/QueryPlan/OffsetStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/src/Processors/QueryPlan/OffsetStep.h b/src/Processors/QueryPlan/OffsetStep.h index 488c55b6460..f16559bcfad 100644 --- a/src/Processors/QueryPlan/OffsetStep.h +++ b/src/Processors/QueryPlan/OffsetStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 687946659b6..e81cec723a1 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -7,10 +7,7 @@ #include #include #include -#include -#include -#include -#include +#include #include #include #include @@ -237,10 +234,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes // { // } - if (typeid_cast(child.get()) - || typeid_cast(child.get()) - || typeid_cast(child.get()) - || typeid_cast(child.get())) + if (typeid_cast(child.get())) { Names allowed_inputs = child->getOutputStream().header.getNames(); if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) diff --git a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp index 01af6a2bbde..eb65f49103b 100644 --- a/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/limitPushDown.cpp @@ -2,10 +2,7 @@ #include #include #include -#include -#include -#include -#include +#include #include namespace DB::QueryPlanOptimizations @@ -21,32 +18,15 @@ static bool tryUpdateLimitForSortingSteps(QueryPlan::Node * node, size_t limit) QueryPlan::Node * child = nullptr; bool updated = false; - if (auto * merging_sorted = typeid_cast(step.get())) + if (auto * sorting = typeid_cast(step.get())) { /// TODO: remove LimitStep here. - merging_sorted->updateLimit(limit); + sorting->updateLimit(limit); updated = true; child = node->children.front(); } - else if (auto * finish_sorting = typeid_cast(step.get())) - { - /// TODO: remove LimitStep here. - finish_sorting->updateLimit(limit); - updated = true; - } - else if (auto * merge_sorting = typeid_cast(step.get())) - { - merge_sorting->updateLimit(limit); - updated = true; - child = node->children.front(); - } - else if (auto * partial_sorting = typeid_cast(step.get())) - { - partial_sorting->updateLimit(limit); - updated = true; - } - /// We often have chain PartialSorting -> MergeSorting -> MergingSorted + /// In case we have several sorting steps. /// Try update limit for them also if possible. if (child) tryUpdateLimitForSortingSteps(child, limit); diff --git a/src/Processors/QueryPlan/PartialSortingStep.cpp b/src/Processors/QueryPlan/PartialSortingStep.cpp deleted file mode 100644 index cf7cb157e4c..00000000000 --- a/src/Processors/QueryPlan/PartialSortingStep.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -static ITransformingStep::Traits getTraits(size_t limit) -{ - return ITransformingStep::Traits - { - { - .preserves_distinct_columns = true, - .returns_single_stream = false, - .preserves_number_of_streams = true, - .preserves_sorting = false, - }, - { - .preserves_number_of_rows = limit == 0, - } - }; -} - -PartialSortingStep::PartialSortingStep( - const DataStream & input_stream, - SortDescription sort_description_, - UInt64 limit_, - SizeLimits size_limits_) - : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) - , sort_description(std::move(sort_description_)) - , limit(limit_) - , size_limits(size_limits_) -{ - output_stream->sort_description = sort_description; - output_stream->sort_mode = DataStream::SortMode::Chunk; -} - -void PartialSortingStep::updateLimit(size_t limit_) -{ - if (limit_ && (limit == 0 || limit_ < limit)) - { - limit = limit_; - transform_traits.preserves_number_of_rows = false; - } -} - -void PartialSortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; - - return std::make_shared(header, sort_description, limit); - }); - - StreamLocalLimits limits; - limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 - limits.size_limits = size_limits; - - pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr - { - if (stream_type != QueryPipelineBuilder::StreamType::Main) - return nullptr; - - auto transform = std::make_shared(header, limits); - return transform; - }); -} - -void PartialSortingStep::describeActions(FormatSettings & settings) const -{ - String prefix(settings.offset, ' '); - settings.out << prefix << "Sort description: "; - dumpSortDescription(sort_description, input_streams.front().header, settings.out); - settings.out << '\n'; - - if (limit) - settings.out << prefix << "Limit " << limit << '\n'; -} - -void PartialSortingStep::describeActions(JSONBuilder::JSONMap & map) const -{ - map.add("Sort Description", explainSortDescription(sort_description, input_streams.front().header)); - - if (limit) - map.add("Limit", limit); -} - -} diff --git a/src/Processors/QueryPlan/PartialSortingStep.h b/src/Processors/QueryPlan/PartialSortingStep.h deleted file mode 100644 index bd8fd30ce02..00000000000 --- a/src/Processors/QueryPlan/PartialSortingStep.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#include -#include -#include - -namespace DB -{ - -/// Sort separate chunks of data. -class PartialSortingStep : public ITransformingStep -{ -public: - explicit PartialSortingStep( - const DataStream & input_stream, - SortDescription sort_description_, - UInt64 limit_, - SizeLimits size_limits_); - - String getName() const override { return "PartialSorting"; } - - void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - void describeActions(JSONBuilder::JSONMap & map) const override; - void describeActions(FormatSettings & settings) const override; - - /// Add limit or change it to lower value. - void updateLimit(size_t limit_); - -private: - SortDescription sort_description; - UInt64 limit; - SizeLimits size_limits; -}; - -} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index 6fb6a24f65b..f319e562bfb 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 8d3005e725f..519de724f9e 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -134,7 +135,6 @@ Pipe ReadFromMergeTree::readFromPool( data, metadata_snapshot, prewhere_info, - true, required_columns, backoff_settings, settings.preferred_block_size_bytes, @@ -173,7 +173,7 @@ ProcessorPtr ReadFromMergeTree::createSource( return std::make_shared( data, metadata_snapshot, part.data_part, max_block_size, preferred_block_size_bytes, preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info, - actions_settings, true, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block); + actions_settings, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block); } Pipe ReadFromMergeTree::readInOrder( @@ -485,8 +485,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( pipe.getHeader(), pipe.numOutputPorts(), sort_description, - max_block_size, - 0, true); + max_block_size); pipe.addTransform(std::move(transform)); } @@ -507,38 +506,39 @@ static void addMergingFinal( const auto & header = pipe.getHeader(); size_t num_outputs = pipe.numOutputPorts(); + auto now = time(nullptr); + auto get_merging_processor = [&]() -> MergingTransformPtr { switch (merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: - { return std::make_shared(header, num_outputs, - sort_description, max_block_size); - } + sort_description, max_block_size); case MergeTreeData::MergingParams::Collapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, true, max_block_size); + sort_description, merging_params.sign_column, true, max_block_size); case MergeTreeData::MergingParams::Summing: return std::make_shared(header, num_outputs, - sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); + sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); case MergeTreeData::MergingParams::Aggregating: return std::make_shared(header, num_outputs, - sort_description, max_block_size); + sort_description, max_block_size); case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.version_column, max_block_size); + sort_description, merging_params.version_column, max_block_size); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, max_block_size); + sort_description, merging_params.sign_column, max_block_size); case MergeTreeData::MergingParams::Graphite: - throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); + return std::make_shared(header, num_outputs, + sort_description, max_block_size, merging_params.graphite_params, now); } __builtin_unreachable(); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index c8213d58db6..fc8136177cf 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 407b968b728..bb6e814ad9f 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index cd2f42ece58..399e7d01839 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/ReadNothingStep.cpp b/src/Processors/QueryPlan/ReadNothingStep.cpp index 7019b88f0b2..253f3a5b980 100644 --- a/src/Processors/QueryPlan/ReadNothingStep.cpp +++ b/src/Processors/QueryPlan/ReadNothingStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/RollupStep.cpp b/src/Processors/QueryPlan/RollupStep.cpp index 114fe661c70..acaeb2bc9a7 100644 --- a/src/Processors/QueryPlan/RollupStep.cpp +++ b/src/Processors/QueryPlan/RollupStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/RollupStep.h b/src/Processors/QueryPlan/RollupStep.h index 2ff3040d7a7..7cd71fecdc1 100644 --- a/src/Processors/QueryPlan/RollupStep.h +++ b/src/Processors/QueryPlan/RollupStep.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp index 47f8187c3aa..4ca3d0ebf54 100644 --- a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp +++ b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include namespace DB diff --git a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h index b36ddfb3768..a8d1eef4b08 100644 --- a/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h +++ b/src/Processors/QueryPlan/SettingQuotaAndLimitsStep.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp new file mode 100644 index 00000000000..32b314b1c50 --- /dev/null +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -0,0 +1,241 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +static ITransformingStep::Traits getTraits(size_t limit) +{ + return ITransformingStep::Traits + { + { + .preserves_distinct_columns = true, + .returns_single_stream = true, + .preserves_number_of_streams = false, + .preserves_sorting = false, + }, + { + .preserves_number_of_rows = limit == 0, + } + }; +} + +SortingStep::SortingStep( + const DataStream & input_stream, + const SortDescription & description_, + size_t max_block_size_, + UInt64 limit_, + SizeLimits size_limits_, + size_t max_bytes_before_remerge_, + double remerge_lowered_memory_bytes_ratio_, + size_t max_bytes_before_external_sort_, + VolumePtr tmp_volume_, + size_t min_free_disk_space_) + : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) + , type(Type::Full) + , result_description(description_) + , max_block_size(max_block_size_) + , limit(limit_) + , size_limits(size_limits_) + , max_bytes_before_remerge(max_bytes_before_remerge_) + , remerge_lowered_memory_bytes_ratio(remerge_lowered_memory_bytes_ratio_) + , max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_volume(tmp_volume_) + , min_free_disk_space(min_free_disk_space_) +{ + /// TODO: check input_stream is partially sorted by the same description. + output_stream->sort_description = result_description; + output_stream->sort_mode = DataStream::SortMode::Stream; +} + +SortingStep::SortingStep( + const DataStream & input_stream_, + SortDescription prefix_description_, + SortDescription result_description_, + size_t max_block_size_, + UInt64 limit_) + : ITransformingStep(input_stream_, input_stream_.header, getTraits(limit_)) + , type(Type::FinishSorting) + , prefix_description(std::move(prefix_description_)) + , result_description(std::move(result_description_)) + , max_block_size(max_block_size_) + , limit(limit_) +{ + /// TODO: check input_stream is sorted by prefix_description. + output_stream->sort_description = result_description; + output_stream->sort_mode = DataStream::SortMode::Stream; +} + +SortingStep::SortingStep( + const DataStream & input_stream, + SortDescription sort_description_, + size_t max_block_size_, + UInt64 limit_) + : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) + , type(Type::MergingSorted) + , result_description(std::move(sort_description_)) + , max_block_size(max_block_size_) + , limit(limit_) +{ + /// TODO: check input_stream is partially sorted (each port) by the same description. + output_stream->sort_description = result_description; + output_stream->sort_mode = DataStream::SortMode::Stream; +} + +void SortingStep::updateLimit(size_t limit_) +{ + if (limit_ && (limit == 0 || limit_ < limit)) + { + limit = limit_; + transform_traits.preserves_number_of_rows = false; + } +} + +void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + if (type == Type::FinishSorting) + { + bool need_finish_sorting = (prefix_description.size() < result_description.size()); + if (pipeline.getNumStreams() > 1) + { + UInt64 limit_for_merging = (need_finish_sorting ? 0 : limit); + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + prefix_description, + max_block_size, + limit_for_merging); + + pipeline.addTransform(std::move(transform)); + } + + if (need_finish_sorting) + { + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + return std::make_shared(header, result_description, limit); + }); + + /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform + pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr + { + return std::make_shared( + header, prefix_description, result_description, max_block_size, limit); + }); + } + } + else if (type == Type::Full) + { + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + return std::make_shared(header, result_description, limit); + }); + + StreamLocalLimits limits; + limits.mode = LimitsMode::LIMITS_CURRENT; //-V1048 + limits.size_limits = size_limits; + + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type != QueryPipelineBuilder::StreamType::Main) + return nullptr; + + auto transform = std::make_shared(header, limits); + return transform; + }); + + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr + { + if (stream_type == QueryPipelineBuilder::StreamType::Totals) + return nullptr; + + return std::make_shared( + header, result_description, max_block_size, limit, + max_bytes_before_remerge / pipeline.getNumStreams(), + remerge_lowered_memory_bytes_ratio, + max_bytes_before_external_sort, + tmp_volume, + min_free_disk_space); + }); + + /// If there are several streams, then we merge them into one + if (pipeline.getNumStreams() > 1) + { + + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + result_description, + max_block_size, limit); + + pipeline.addTransform(std::move(transform)); + } + } + else if (type == Type::MergingSorted) + { /// If there are several streams, then we merge them into one + if (pipeline.getNumStreams() > 1) + { + + auto transform = std::make_shared( + pipeline.getHeader(), + pipeline.getNumStreams(), + result_description, + max_block_size, limit); + + pipeline.addTransform(std::move(transform)); + } + } +} + +void SortingStep::describeActions(FormatSettings & settings) const +{ + String prefix(settings.offset, ' '); + + if (!prefix_description.empty()) + { + settings.out << prefix << "Prefix sort description: "; + dumpSortDescription(prefix_description, input_streams.front().header, settings.out); + settings.out << '\n'; + + settings.out << prefix << "Result sort description: "; + dumpSortDescription(result_description, input_streams.front().header, settings.out); + settings.out << '\n'; + } + else + { + settings.out << prefix << "Sort description: "; + dumpSortDescription(result_description, input_streams.front().header, settings.out); + settings.out << '\n'; + } + + if (limit) + settings.out << prefix << "Limit " << limit << '\n'; +} + +void SortingStep::describeActions(JSONBuilder::JSONMap & map) const +{ + if (!prefix_description.empty()) + { + map.add("Prefix Sort Description", explainSortDescription(prefix_description, input_streams.front().header)); + map.add("Result Sort Description", explainSortDescription(result_description, input_streams.front().header)); + } + else + map.add("Sort Description", explainSortDescription(result_description, input_streams.front().header)); + + if (limit) + map.add("Limit", limit); +} + +} diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h new file mode 100644 index 00000000000..8e253e71f44 --- /dev/null +++ b/src/Processors/QueryPlan/SortingStep.h @@ -0,0 +1,76 @@ +#pragma once +#include +#include +#include +#include + +namespace DB +{ + +/// Sort data stream +class SortingStep : public ITransformingStep +{ +public: + /// Full + SortingStep( + const DataStream & input_stream, + const SortDescription & description_, + size_t max_block_size_, + UInt64 limit_, + SizeLimits size_limits_, + size_t max_bytes_before_remerge_, + double remerge_lowered_memory_bytes_ratio_, + size_t max_bytes_before_external_sort_, + VolumePtr tmp_volume_, + size_t min_free_disk_space_); + + /// FinishSorting + SortingStep( + const DataStream & input_stream_, + SortDescription prefix_description_, + SortDescription result_description_, + size_t max_block_size_, + UInt64 limit_); + + /// MergingSorted + SortingStep( + const DataStream & input_stream, + SortDescription sort_description_, + size_t max_block_size_, + UInt64 limit_ = 0); + + String getName() const override { return "Sorting"; } + + void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + void describeActions(JSONBuilder::JSONMap & map) const override; + void describeActions(FormatSettings & settings) const override; + + /// Add limit or change it to lower value. + void updateLimit(size_t limit_); + +private: + + enum class Type + { + Full, + FinishSorting, + MergingSorted, + }; + + Type type; + + SortDescription prefix_description; + SortDescription result_description; + size_t max_block_size; + UInt64 limit; + SizeLimits size_limits; + + size_t max_bytes_before_remerge = 0; + double remerge_lowered_memory_bytes_ratio = 0; + size_t max_bytes_before_external_sort = 0; + VolumePtr tmp_volume; + size_t min_free_disk_space = 0; +}; + +} diff --git a/src/Processors/QueryPlan/TotalsHavingStep.cpp b/src/Processors/QueryPlan/TotalsHavingStep.cpp index db14950b229..be2cd2348a4 100644 --- a/src/Processors/QueryPlan/TotalsHavingStep.cpp +++ b/src/Processors/QueryPlan/TotalsHavingStep.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/UnionStep.cpp b/src/Processors/QueryPlan/UnionStep.cpp index 85705595479..5d40a9e241e 100644 --- a/src/Processors/QueryPlan/UnionStep.cpp +++ b/src/Processors/QueryPlan/UnionStep.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index ca09f4a9474..cd4bb5f6730 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h new file mode 100644 index 00000000000..30cf958c072 --- /dev/null +++ b/src/Processors/Sinks/RemoteSink.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + +namespace DB +{ + +class RemoteSink final : public RemoteInserter, public SinkToStorage +{ +public: + explicit RemoteSink( + Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings & settings_, + const ClientInfo & client_info_) + : RemoteInserter(connection_, timeouts, query_, settings_, client_info_) + , SinkToStorage(RemoteInserter::getHeader()) + { + } + + String getName() const override { return "RemoteSink"; } + void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void onFinish() override { RemoteInserter::onFinish(); } +}; + +} diff --git a/src/Processors/Sources/DelayedSource.h b/src/Processors/Sources/DelayedSource.h index 7c2b104b61e..3d17c13ad4c 100644 --- a/src/Processors/Sources/DelayedSource.h +++ b/src/Processors/Sources/DelayedSource.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/Formats/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp similarity index 99% rename from src/Formats/MySQLSource.cpp rename to src/Processors/Sources/MySQLSource.cpp index 069aa0cb56b..5bda662466c 100644 --- a/src/Formats/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -19,7 +19,7 @@ #include #include #include -#include "MySQLSource.h" +#include namespace DB diff --git a/src/Formats/MySQLSource.h b/src/Processors/Sources/MySQLSource.h similarity index 100% rename from src/Formats/MySQLSource.h rename to src/Processors/Sources/MySQLSource.h diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index bf3ef32214d..99ba459cf2c 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include #include #include @@ -56,7 +56,7 @@ std::optional RemoteSource::tryGenerate() query_executor->setProgressCallback([this](const Progress & value) { progress(value); }); /// Get rows_before_limit result for remote query from ProfileInfo packet. - query_executor->setProfileInfoCallback([this](const BlockStreamProfileInfo & info) + query_executor->setProfileInfoCallback([this](const ProfileInfo & info) { if (rows_before_limit && info.hasAppliedLimit()) rows_before_limit->set(info.getRowsBeforeLimit()); diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index 2109cb5eba4..23c3ddec401 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include namespace DB diff --git a/src/DataStreams/SQLiteSource.cpp b/src/Processors/Sources/SQLiteSource.cpp similarity index 100% rename from src/DataStreams/SQLiteSource.cpp rename to src/Processors/Sources/SQLiteSource.cpp diff --git a/src/DataStreams/SQLiteSource.h b/src/Processors/Sources/SQLiteSource.h similarity index 100% rename from src/DataStreams/SQLiteSource.h rename to src/Processors/Sources/SQLiteSource.h diff --git a/src/DataStreams/ShellCommandSource.h b/src/Processors/Sources/ShellCommandSource.h similarity index 99% rename from src/DataStreams/ShellCommandSource.h rename to src/Processors/Sources/ShellCommandSource.h index 18dbd2e03aa..4974c33f290 100644 --- a/src/DataStreams/ShellCommandSource.h +++ b/src/Processors/Sources/ShellCommandSource.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include diff --git a/src/Processors/Sources/SourceFromInputStream.cpp b/src/Processors/Sources/SourceFromInputStream.cpp deleted file mode 100644 index 57e449370a5..00000000000 --- a/src/Processors/Sources/SourceFromInputStream.cpp +++ /dev/null @@ -1,195 +0,0 @@ -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_) - : ISourceWithProgress(stream_->getHeader()) - , force_add_aggregating_info(force_add_aggregating_info_) - , stream(std::move(stream_)) -{ - init(); -} - -void SourceFromInputStream::init() -{ - const auto & sample = getPort().getHeader(); - for (auto & type : sample.getDataTypes()) - if (typeid_cast(type.get())) - has_aggregate_functions = true; -} - -void SourceFromInputStream::addTotalsPort() -{ - if (totals_port) - throw Exception("Totals port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); - - outputs.emplace_back(outputs.front().getHeader(), this); - totals_port = &outputs.back(); -} - -void SourceFromInputStream::addExtremesPort() -{ - if (extremes_port) - throw Exception("Extremes port was already added for SourceFromInputStream.", ErrorCodes::LOGICAL_ERROR); - - outputs.emplace_back(outputs.front().getHeader(), this); - extremes_port = &outputs.back(); -} - -IProcessor::Status SourceFromInputStream::prepare() -{ - auto status = ISource::prepare(); - - if (status == Status::Finished) - { - is_generating_finished = true; - - /// Read postfix and get totals if needed. - if (!is_stream_finished && !isCancelled()) - return Status::Ready; - - if (totals_port && !totals_port->isFinished()) - { - if (has_totals) - { - if (!totals_port->canPush()) - return Status::PortFull; - - totals_port->push(std::move(totals)); - has_totals = false; - } - - totals_port->finish(); - } - - if (extremes_port && !extremes_port->isFinished()) - { - if (has_extremes) - { - if (!extremes_port->canPush()) - return Status::PortFull; - - extremes_port->push(std::move(extremes)); - has_extremes = false; - } - - extremes_port->finish(); - } - } - - return status; -} - -void SourceFromInputStream::work() -{ - if (!is_generating_finished) - { - try - { - ISource::work(); - } - catch (...) - { - /// Won't read suffix in case of exception. - is_stream_finished = true; - throw; - } - - return; - } - - if (is_stream_finished) - return; - - if (rows_before_limit) - { - const auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - } - - stream->readSuffix(); - - if (auto totals_block = stream->getTotals()) - { - totals.setColumns(totals_block.getColumns(), 1); - has_totals = true; - } - - is_stream_finished = true; -} - -Chunk SourceFromInputStream::generate() -{ - if (is_stream_finished) - return {}; - - if (!is_stream_started) - { - stream->readPrefix(); - is_stream_started = true; - } - - auto block = stream->read(); - if (!block && !isCancelled()) - { - if (rows_before_limit) - { - const auto & info = stream->getProfileInfo(); - if (info.hasAppliedLimit()) - rows_before_limit->add(info.getRowsBeforeLimit()); - } - - stream->readSuffix(); - - if (auto totals_block = stream->getTotals()) - { - if (totals_block.rows() > 0) /// Sometimes we can get empty totals. Skip it. - { - totals.setColumns(totals_block.getColumns(), totals_block.rows()); - has_totals = true; - } - } - - if (auto extremes_block = stream->getExtremes()) - { - if (extremes_block.rows() > 0) /// Sometimes we can get empty extremes. Skip it. - { - extremes.setColumns(extremes_block.getColumns(), extremes_block.rows()); - has_extremes = true; - } - } - - is_stream_finished = true; - return {}; - } - - if (isCancelled()) - return {}; - -#ifndef NDEBUG - assertBlocksHaveEqualStructure(getPort().getHeader(), block, "SourceFromInputStream"); -#endif - - UInt64 num_rows = block.rows(); - Chunk chunk(block.getColumns(), num_rows); - - if (force_add_aggregating_info || has_aggregate_functions) - { - auto info = std::make_shared(); - info->bucket_num = block.info.bucket_num; - info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); - } - - return chunk; -} - -} diff --git a/src/Processors/Sources/SourceFromInputStream.h b/src/Processors/Sources/SourceFromInputStream.h deleted file mode 100644 index 9649385909c..00000000000 --- a/src/Processors/Sources/SourceFromInputStream.h +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once - -#include -#include -#include - - -namespace DB -{ - -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr; - -/// Wrapper for IBlockInputStream which implements ISourceWithProgress. -class SourceFromInputStream : public ISourceWithProgress -{ -public: - /// If force_add_aggregating_info is enabled, AggregatedChunkInfo (with bucket number and is_overflows flag) will be added to result chunk. - explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false); - String getName() const override { return "SourceFromInputStream"; } - - Status prepare() override; - void work() override; - - Chunk generate() override; - - BlockInputStreamPtr & getStream() { return stream; } - - void addTotalsPort(); - void addExtremesPort(); - - OutputPort * getTotalsPort() const { return totals_port; } - OutputPort * getExtremesPort() const { return extremes_port; } - - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } - - /// Implementation for methods from ISourceWithProgress. - void setLimits(const StreamLocalLimits & limits_) final { stream->setLimits(limits_); } - void setLeafLimits(const SizeLimits &) final { } - void setQuota(const std::shared_ptr & quota_) final { stream->setQuota(quota_); } - void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); } - void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } - void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } - - /// Stop reading from stream if output port is finished. - void onUpdatePorts() override - { - if (getPort().isFinished()) - cancel(); - } - -protected: - void onCancel() override { stream->cancel(false); } - -private: - bool has_aggregate_functions = false; - bool force_add_aggregating_info = false; - BlockInputStreamPtr stream; - - RowsBeforeLimitCounterPtr rows_before_limit; - - Chunk totals; - OutputPort * totals_port = nullptr; - bool has_totals = false; - - Chunk extremes; - OutputPort * extremes_port = nullptr; - bool has_extremes = false; - - bool is_generating_finished = false; - bool is_stream_finished = false; - bool is_stream_started = false; - - void init(); -}; - -} diff --git a/src/Processors/Sources/SourceWithProgress.cpp b/src/Processors/Sources/SourceWithProgress.cpp index 15d64dee3ee..0ebdd968997 100644 --- a/src/Processors/Sources/SourceWithProgress.cpp +++ b/src/Processors/Sources/SourceWithProgress.cpp @@ -69,8 +69,7 @@ void SourceWithProgress::work() } } -/// Aggregated copy-paste from IBlockInputStream::progressImpl. -/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. +/// TODO: Most of this must be done in PipelineExecutor outside. void SourceWithProgress::progress(const Progress & value) { was_progress_called = true; @@ -135,14 +134,12 @@ void SourceWithProgress::progress(const Progress & value) if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) { - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. CurrentThread::updatePerformanceCounters(); last_profile_events_update_time = total_elapsed_microseconds; } - /// Should be done in PipelineExecutor. - /// It is here for compatibility with IBlockInputsStream. + /// TODO: Should be done in PipelineExecutor. limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota && limits.mode == LimitsMode::LIMITS_TOTAL) diff --git a/src/Processors/Sources/SourceWithProgress.h b/src/Processors/Sources/SourceWithProgress.h index bf57c3b013b..912a548f977 100644 --- a/src/Processors/Sources/SourceWithProgress.h +++ b/src/Processors/Sources/SourceWithProgress.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include diff --git a/src/Processors/Sources/TemporaryFileLazySource.cpp b/src/Processors/Sources/TemporaryFileLazySource.cpp new file mode 100644 index 00000000000..0382229a7c0 --- /dev/null +++ b/src/Processors/Sources/TemporaryFileLazySource.cpp @@ -0,0 +1,32 @@ +#include +#include + +namespace DB +{ + +TemporaryFileLazySource::~TemporaryFileLazySource() = default; + +TemporaryFileLazySource::TemporaryFileLazySource(const std::string & path_, const Block & header_) + : ISource(header_) + , path(path_) + , done(false) +{} + +Chunk TemporaryFileLazySource::generate() +{ + if (done) + return {}; + + if (!stream) + stream = std::make_unique(path, header); + + auto block = stream->block_in->read(); + if (!block) + { + done = true; + stream.reset(); + } + return Chunk(block.getColumns(), block.rows()); +} + +} diff --git a/src/Processors/Sources/TemporaryFileLazySource.h b/src/Processors/Sources/TemporaryFileLazySource.h new file mode 100644 index 00000000000..b2e9d5d5500 --- /dev/null +++ b/src/Processors/Sources/TemporaryFileLazySource.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB +{ + +struct TemporaryFileStream; + +class TemporaryFileLazySource : public ISource +{ +public: + TemporaryFileLazySource(const std::string & path_, const Block & header_); + ~TemporaryFileLazySource() override; + String getName() const override { return "TemporaryFileLazySource"; } + +protected: + Chunk generate() override; + +private: + const std::string path; + Block header; + bool done; + + std::unique_ptr stream; +}; + +} diff --git a/src/DataStreams/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp similarity index 98% rename from src/DataStreams/ITTLAlgorithm.cpp rename to src/Processors/TTL/ITTLAlgorithm.cpp index 7513e0c6ce0..489941950b5 100644 --- a/src/DataStreams/ITTLAlgorithm.cpp +++ b/src/Processors/TTL/ITTLAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/ITTLAlgorithm.h b/src/Processors/TTL/ITTLAlgorithm.h similarity index 100% rename from src/DataStreams/ITTLAlgorithm.h rename to src/Processors/TTL/ITTLAlgorithm.h diff --git a/src/DataStreams/TTLAggregationAlgorithm.cpp b/src/Processors/TTL/TTLAggregationAlgorithm.cpp similarity index 99% rename from src/DataStreams/TTLAggregationAlgorithm.cpp rename to src/Processors/TTL/TTLAggregationAlgorithm.cpp index d012464ea14..5581892f16f 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.cpp +++ b/src/Processors/TTL/TTLAggregationAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLAggregationAlgorithm.h b/src/Processors/TTL/TTLAggregationAlgorithm.h similarity index 96% rename from src/DataStreams/TTLAggregationAlgorithm.h rename to src/Processors/TTL/TTLAggregationAlgorithm.h index c2f40bab6b9..0e4bf092ed6 100644 --- a/src/DataStreams/TTLAggregationAlgorithm.h +++ b/src/Processors/TTL/TTLAggregationAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include diff --git a/src/DataStreams/TTLColumnAlgorithm.cpp b/src/Processors/TTL/TTLColumnAlgorithm.cpp similarity index 98% rename from src/DataStreams/TTLColumnAlgorithm.cpp rename to src/Processors/TTL/TTLColumnAlgorithm.cpp index 71ad2a4e38f..7cef77c185c 100644 --- a/src/DataStreams/TTLColumnAlgorithm.cpp +++ b/src/Processors/TTL/TTLColumnAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLColumnAlgorithm.h b/src/Processors/TTL/TTLColumnAlgorithm.h similarity index 95% rename from src/DataStreams/TTLColumnAlgorithm.h rename to src/Processors/TTL/TTLColumnAlgorithm.h index ddf963eaee2..30de77dcc2a 100644 --- a/src/DataStreams/TTLColumnAlgorithm.h +++ b/src/Processors/TTL/TTLColumnAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/DataStreams/TTLDeleteAlgorithm.cpp b/src/Processors/TTL/TTLDeleteAlgorithm.cpp similarity index 97% rename from src/DataStreams/TTLDeleteAlgorithm.cpp rename to src/Processors/TTL/TTLDeleteAlgorithm.cpp index ea7a0b235ec..eec6b21f949 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.cpp +++ b/src/Processors/TTL/TTLDeleteAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLDeleteAlgorithm.h b/src/Processors/TTL/TTLDeleteAlgorithm.h similarity index 92% rename from src/DataStreams/TTLDeleteAlgorithm.h rename to src/Processors/TTL/TTLDeleteAlgorithm.h index 8ab3f8b63e8..292a29bfa27 100644 --- a/src/DataStreams/TTLDeleteAlgorithm.h +++ b/src/Processors/TTL/TTLDeleteAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp similarity index 97% rename from src/DataStreams/TTLUpdateInfoAlgorithm.cpp rename to src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp index 6a983d052c1..eba364aa2b8 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.cpp +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp @@ -1,4 +1,4 @@ -#include +#include namespace DB { diff --git a/src/DataStreams/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h similarity index 94% rename from src/DataStreams/TTLUpdateInfoAlgorithm.h rename to src/Processors/TTL/TTLUpdateInfoAlgorithm.h index 551211fc47f..45eecbde3d0 100644 --- a/src/DataStreams/TTLUpdateInfoAlgorithm.h +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h @@ -1,6 +1,6 @@ #pragma once -#include +#include namespace DB { diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 9011d188b81..8357a997960 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -1,10 +1,9 @@ #include -#include +#include #include -#include +#include #include -#include #include namespace ProfileEvents diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 1639bc4df4b..2a515fdf3be 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -24,9 +24,6 @@ public: Int32 bucket_num = -1; }; -class IBlockInputStream; -using BlockInputStreamPtr = std::shared_ptr; - using AggregatorList = std::list; using AggregatorListPtr = std::shared_ptr; diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index ddb8a5a0d68..d0cb4975290 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -127,7 +127,7 @@ ColumnGathererTransform::ColumnGathererTransform( ReadBuffer & row_sources_buf_, size_t block_preferred_size_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*has_limit_below_one_block_=*/ false, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, num_inputs, row_sources_buf_, block_preferred_size_) , log(&Poco::Logger::get("ColumnGathererStream")) { diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index 6776caae9bf..fb3c8d6a87b 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -1,14 +1,12 @@ #include #include #include -#include #include #include #include #include -#include namespace DB diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index eca12c33f54..839ab0cac88 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -1,12 +1,11 @@ #pragma once -#include -#include +#include #include #include #include -#include -#include +#include +#include #include #include diff --git a/src/Processors/Transforms/DistinctTransform.h b/src/Processors/Transforms/DistinctTransform.h index 236f9026c63..d80fdb5bc22 100644 --- a/src/Processors/Transforms/DistinctTransform.h +++ b/src/Processors/Transforms/DistinctTransform.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 9de5cbf5125..50891ece654 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -1,10 +1,10 @@ #pragma once #include -#include +#include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index f13d5376ebe..abf416e8047 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index 6e379a3c4ba..73817d7de4a 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -6,8 +6,8 @@ #include #include #include -#include -#include +#include +#include #include @@ -197,7 +197,6 @@ void MergeSortingTransform::consume(Chunk chunk) description, max_merged_block_size, limit, - false, nullptr, quiet, use_average_block_sizes, diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index df2ea4b03f0..7f0893a6c0d 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Processors/Transforms/SortingTransform.cpp b/src/Processors/Transforms/SortingTransform.cpp index 2c9098adaa6..eeb576731ab 100644 --- a/src/Processors/Transforms/SortingTransform.cpp +++ b/src/Processors/Transforms/SortingTransform.cpp @@ -9,8 +9,8 @@ #include #include -#include -#include +#include +#include namespace ProfileEvents diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 75a799e5af1..bf4a051891b 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -1,6 +1,7 @@ #pragma once + #include -#include +#include namespace DB { diff --git a/src/DataStreams/TTLCalcInputStream.cpp b/src/Processors/Transforms/TTLCalcTransform.cpp similarity index 97% rename from src/DataStreams/TTLCalcInputStream.cpp rename to src/Processors/Transforms/TTLCalcTransform.cpp index fe85e40c003..31fb61239ef 100644 --- a/src/DataStreams/TTLCalcInputStream.cpp +++ b/src/Processors/Transforms/TTLCalcTransform.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include namespace DB { diff --git a/src/DataStreams/TTLCalcInputStream.h b/src/Processors/Transforms/TTLCalcTransform.h similarity index 96% rename from src/DataStreams/TTLCalcInputStream.h rename to src/Processors/Transforms/TTLCalcTransform.h index b6318026b8c..14592c07155 100644 --- a/src/DataStreams/TTLCalcInputStream.h +++ b/src/Processors/Transforms/TTLCalcTransform.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/Processors/Transforms/TTLTransform.cpp similarity index 95% rename from src/DataStreams/TTLBlockInputStream.cpp rename to src/Processors/Transforms/TTLTransform.cpp index b476f689e60..a515a50fafb 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -8,10 +8,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include namespace DB { diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/Processors/Transforms/TTLTransform.h similarity index 92% rename from src/DataStreams/TTLBlockInputStream.h rename to src/Processors/Transforms/TTLTransform.h index 50b28e81bdf..9207c68448b 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/Processors/Transforms/TTLTransform.h @@ -4,8 +4,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index f30058fadb9..0b7797da24f 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include namespace DB @@ -29,6 +29,25 @@ void finalizeChunk(Chunk & chunk) chunk.setColumns(std::move(columns), num_rows); } +void finalizeBlock(Block & block) +{ + for (size_t i = 0; i < block.columns(); ++i) + { + ColumnWithTypeAndName & current = block.getByPosition(i); + const DataTypeAggregateFunction * unfinalized_type = typeid_cast(current.type.get()); + + if (unfinalized_type) + { + current.type = unfinalized_type->getReturnType(); + if (current.column) + { + auto mut_column = IColumn::mutate(std::move(current.column)); + current.column = ColumnAggregateFunction::convertToValues(std::move(mut_column)); + } + } + } +} + Block TotalsHavingTransform::transformHeader( Block block, const ActionsDAG * expression, diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 75d0528ff7b..260fdfb3a19 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -1,9 +1,8 @@ #pragma once -#include #include #include -#include +#include #include #include #include diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index a1cce03a1a5..86998614189 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -5,12 +5,12 @@ #include #include #include -#include +#include #include #include #include #include -#include +#include #include #include "IO/CompressionMethod.h" #include "Parsers/ASTLiteral.h" diff --git a/src/DataStreams/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp similarity index 92% rename from src/DataStreams/BlockIO.cpp rename to src/QueryPipeline/BlockIO.cpp index 5f1abdaf806..671ba6e4c39 100644 --- a/src/DataStreams/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -1,6 +1,5 @@ -#include +#include #include -#include namespace DB { diff --git a/src/DataStreams/BlockIO.h b/src/QueryPipeline/BlockIO.h similarity index 96% rename from src/DataStreams/BlockIO.h rename to src/QueryPipeline/BlockIO.h index d699d525f2f..0f05beca4a8 100644 --- a/src/DataStreams/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/src/DataStreams/CMakeLists.txt b/src/QueryPipeline/CMakeLists.txt similarity index 100% rename from src/DataStreams/CMakeLists.txt rename to src/QueryPipeline/CMakeLists.txt diff --git a/src/Processors/Chain.cpp b/src/QueryPipeline/Chain.cpp similarity index 99% rename from src/Processors/Chain.cpp rename to src/QueryPipeline/Chain.cpp index 5e3b2e6a678..ca7b0b80692 100644 --- a/src/Processors/Chain.cpp +++ b/src/QueryPipeline/Chain.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { diff --git a/src/Processors/Chain.h b/src/QueryPipeline/Chain.h similarity index 97% rename from src/Processors/Chain.h rename to src/QueryPipeline/Chain.h index da5167f9c7a..c5fdc34cecf 100644 --- a/src/Processors/Chain.h +++ b/src/QueryPipeline/Chain.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB { diff --git a/src/DataStreams/ConnectionCollector.cpp b/src/QueryPipeline/ConnectionCollector.cpp similarity index 98% rename from src/DataStreams/ConnectionCollector.cpp rename to src/QueryPipeline/ConnectionCollector.cpp index df206478e91..a6a0afb68d3 100644 --- a/src/DataStreams/ConnectionCollector.cpp +++ b/src/QueryPipeline/ConnectionCollector.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/ConnectionCollector.h b/src/QueryPipeline/ConnectionCollector.h similarity index 100% rename from src/DataStreams/ConnectionCollector.h rename to src/QueryPipeline/ConnectionCollector.h diff --git a/src/DataStreams/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp similarity index 99% rename from src/DataStreams/ExecutionSpeedLimits.cpp rename to src/QueryPipeline/ExecutionSpeedLimits.cpp index e340ee71ab0..2738903cedb 100644 --- a/src/DataStreams/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/ExecutionSpeedLimits.h b/src/QueryPipeline/ExecutionSpeedLimits.h similarity index 95% rename from src/DataStreams/ExecutionSpeedLimits.h rename to src/QueryPipeline/ExecutionSpeedLimits.h index b8c320bb005..63658462c9f 100644 --- a/src/DataStreams/ExecutionSpeedLimits.h +++ b/src/QueryPipeline/ExecutionSpeedLimits.h @@ -2,7 +2,7 @@ #include #include -#include +#include class Stopwatch; diff --git a/src/Processors/Pipe.cpp b/src/QueryPipeline/Pipe.cpp similarity index 98% rename from src/Processors/Pipe.cpp rename to src/QueryPipeline/Pipe.cpp index ec288484ca3..6cef7cc28bd 100644 --- a/src/Processors/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -1,6 +1,5 @@ -#include +#include #include -#include #include #include #include @@ -9,6 +8,7 @@ #include #include #include +#include #include #include @@ -164,13 +164,7 @@ Pipe::Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, Output Pipe::Pipe(ProcessorPtr source) { - if (auto * source_from_input_stream = typeid_cast(source.get())) - { - /// Special case for SourceFromInputStream. Will remove it later. - totals_port = source_from_input_stream->getTotalsPort(); - extremes_port = source_from_input_stream->getExtremesPort(); - } - else if (source->getOutputs().size() != 1) + if (source->getOutputs().size() != 1) checkSource(*source); if (collected_processors) diff --git a/src/Processors/Pipe.h b/src/QueryPipeline/Pipe.h similarity index 98% rename from src/Processors/Pipe.h rename to src/QueryPipeline/Pipe.h index a07c68f56b2..0af02a5e662 100644 --- a/src/Processors/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -1,10 +1,10 @@ #pragma once #include -#include -#include +#include +#include #include -#include +#include namespace DB { diff --git a/src/Processors/PipelineResourcesHolder.cpp b/src/QueryPipeline/PipelineResourcesHolder.cpp similarity index 94% rename from src/Processors/PipelineResourcesHolder.cpp rename to src/QueryPipeline/PipelineResourcesHolder.cpp index 9cb2ea301ad..a4b85ed662b 100644 --- a/src/Processors/PipelineResourcesHolder.cpp +++ b/src/QueryPipeline/PipelineResourcesHolder.cpp @@ -1,4 +1,4 @@ -#include +#include #include namespace DB diff --git a/src/Processors/PipelineResourcesHolder.h b/src/QueryPipeline/PipelineResourcesHolder.h similarity index 100% rename from src/Processors/PipelineResourcesHolder.h rename to src/QueryPipeline/PipelineResourcesHolder.h diff --git a/src/QueryPipeline/ProfileInfo.cpp b/src/QueryPipeline/ProfileInfo.cpp new file mode 100644 index 00000000000..ee0ff8c69bf --- /dev/null +++ b/src/QueryPipeline/ProfileInfo.cpp @@ -0,0 +1,73 @@ +#include + +#include +#include + +#include + +namespace DB +{ + +void ProfileInfo::read(ReadBuffer & in) +{ + readVarUInt(rows, in); + readVarUInt(blocks, in); + readVarUInt(bytes, in); + readBinary(applied_limit, in); + readVarUInt(rows_before_limit, in); + readBinary(calculated_rows_before_limit, in); +} + + +void ProfileInfo::write(WriteBuffer & out) const +{ + writeVarUInt(rows, out); + writeVarUInt(blocks, out); + writeVarUInt(bytes, out); + writeBinary(hasAppliedLimit(), out); + writeVarUInt(getRowsBeforeLimit(), out); + writeBinary(calculated_rows_before_limit, out); +} + + +void ProfileInfo::setFrom(const ProfileInfo & rhs, bool skip_block_size_info) +{ + if (!skip_block_size_info) + { + rows = rhs.rows; + blocks = rhs.blocks; + bytes = rhs.bytes; + } + applied_limit = rhs.applied_limit; + rows_before_limit = rhs.rows_before_limit; + calculated_rows_before_limit = rhs.calculated_rows_before_limit; +} + + +size_t ProfileInfo::getRowsBeforeLimit() const +{ + calculated_rows_before_limit = true; + return rows_before_limit; +} + + +bool ProfileInfo::hasAppliedLimit() const +{ + calculated_rows_before_limit = true; + return applied_limit; +} + + +void ProfileInfo::update(Block & block) +{ + update(block.rows(), block.bytes()); +} + +void ProfileInfo::update(size_t num_rows, size_t num_bytes) +{ + ++blocks; + rows += num_rows; + bytes += num_bytes; +} + +} diff --git a/src/DataStreams/BlockStreamProfileInfo.h b/src/QueryPipeline/ProfileInfo.h similarity index 72% rename from src/DataStreams/BlockStreamProfileInfo.h rename to src/QueryPipeline/ProfileInfo.h index 688bdfc91fc..0a5800cd409 100644 --- a/src/DataStreams/BlockStreamProfileInfo.h +++ b/src/QueryPipeline/ProfileInfo.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -13,12 +12,9 @@ class Block; class ReadBuffer; class WriteBuffer; -/// Information for profiling. See IBlockInputStream.h -struct BlockStreamProfileInfo +/// Information for profiling. See SourceWithProgress.h +struct ProfileInfo { - /// Info about stream object this profile info refers to. - IBlockInputStream * parent = nullptr; - bool started = false; Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time @@ -26,10 +22,7 @@ struct BlockStreamProfileInfo size_t blocks = 0; size_t bytes = 0; - using BlockStreamProfileInfos = std::vector; - - /// Collect BlockStreamProfileInfo for the nearest sources in the tree named `name`. Example; collect all info for PartialSorting streams. - void collectInfosForStreamsWithName(const char * name, BlockStreamProfileInfos & res) const; + using ProfileInfos = std::vector; /** Get the number of rows if there were no LIMIT. * If there is no LIMIT, 0 is returned. @@ -49,7 +42,7 @@ struct BlockStreamProfileInfo /// Sets main fields from other object (see methods above). /// If skip_block_size_info if true, then rows, bytes and block fields are ignored. - void setFrom(const BlockStreamProfileInfo & rhs, bool skip_block_size_info); + void setFrom(const ProfileInfo & rhs, bool skip_block_size_info); /// Only for Processors. void setRowsBeforeLimit(size_t rows_before_limit_) @@ -59,8 +52,6 @@ struct BlockStreamProfileInfo } private: - void calculateRowsBeforeLimit() const; - /// For these fields we make accessors, because they must be calculated beforehand. mutable bool applied_limit = false; /// Whether LIMIT was applied mutable size_t rows_before_limit = 0; diff --git a/src/Processors/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp similarity index 96% rename from src/Processors/QueryPipeline.cpp rename to src/QueryPipeline/QueryPipeline.cpp index a76f99c9e00..ce1c9473f60 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -1,16 +1,15 @@ #include -#include +#include #include #include #include -#include -#include +#include +#include #include #include #include #include #include -#include #include #include #include @@ -121,7 +120,6 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. std::vector limits; - std::vector sources; std::vector remote_sources; std::unordered_set visited; @@ -151,9 +149,6 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) limits.emplace_back(limit); } - if (auto * source = typeid_cast(processor)) - sources.emplace_back(source); - if (auto * source = typeid_cast(processor)) remote_sources.emplace_back(source); } @@ -186,16 +181,13 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) } } - if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty())) { rows_before_limit_at_least = std::make_shared(); for (auto & limit : limits) limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : sources) - source->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : remote_sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } diff --git a/src/Processors/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h similarity index 98% rename from src/Processors/QueryPipeline.h rename to src/QueryPipeline/QueryPipeline.h index 42fdb429a14..beb46361f95 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include namespace DB diff --git a/src/Processors/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp similarity index 97% rename from src/Processors/QueryPipelineBuilder.cpp rename to src/QueryPipeline/QueryPipelineBuilder.cpp index 8ed413166da..40c64046560 100644 --- a/src/Processors/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -468,7 +467,6 @@ void QueryPipelineBuilder::initRowsBeforeLimit() /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. std::vector limits; - std::vector sources; std::vector remote_sources; std::unordered_set visited; @@ -498,9 +496,6 @@ void QueryPipelineBuilder::initRowsBeforeLimit() limits.emplace_back(limit); } - if (auto * source = typeid_cast(processor)) - sources.emplace_back(source); - if (auto * source = typeid_cast(processor)) remote_sources.emplace_back(source); } @@ -533,16 +528,13 @@ void QueryPipelineBuilder::initRowsBeforeLimit() } } - if (!rows_before_limit_at_least && (!limits.empty() || !sources.empty() || !remote_sources.empty())) + if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty())) { rows_before_limit_at_least = std::make_shared(); for (auto & limit : limits) limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : sources) - source->setRowsBeforeLimitCounter(rows_before_limit_at_least); - for (auto & source : remote_sources) source->setRowsBeforeLimitCounter(rows_before_limit_at_least); } diff --git a/src/Processors/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h similarity index 97% rename from src/Processors/QueryPipelineBuilder.h rename to src/QueryPipeline/QueryPipelineBuilder.h index 78ae5dd41be..12f74805173 100644 --- a/src/Processors/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include -#include -#include +#include +#include #include #include @@ -130,7 +129,6 @@ public: void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); } void setQuota(const std::shared_ptr & quota) { pipe.setQuota(quota); } - /// For compatibility with IBlockInputStream. void setProgressCallback(const ProgressCallback & callback); void setProcessListElement(QueryStatus * elem); diff --git a/src/DataStreams/RemoteBlockOutputStream.cpp b/src/QueryPipeline/RemoteInserter.cpp similarity index 98% rename from src/DataStreams/RemoteBlockOutputStream.cpp rename to src/QueryPipeline/RemoteInserter.cpp index 7642098ff0c..c34c625dc6d 100644 --- a/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/DataStreams/RemoteBlockOutputStream.h b/src/QueryPipeline/RemoteInserter.h similarity index 56% rename from src/DataStreams/RemoteBlockOutputStream.h rename to src/QueryPipeline/RemoteInserter.h index f1f49015c9d..0688b555825 100644 --- a/src/DataStreams/RemoteBlockOutputStream.h +++ b/src/QueryPipeline/RemoteInserter.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -44,23 +43,4 @@ private: bool finished = false; }; -class RemoteSink final : public RemoteInserter, public SinkToStorage -{ -public: - explicit RemoteSink( - Connection & connection_, - const ConnectionTimeouts & timeouts, - const String & query_, - const Settings & settings_, - const ClientInfo & client_info_) - : RemoteInserter(connection_, timeouts, query_, settings_, client_info_) - , SinkToStorage(RemoteInserter::getHeader()) - { - } - - String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } - void onFinish() override { RemoteInserter::onFinish(); } -}; - } diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp similarity index 99% rename from src/DataStreams/RemoteQueryExecutor.cpp rename to src/QueryPipeline/RemoteQueryExecutor.cpp index b6a5e6f63d0..b01ed7ba9a2 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -1,13 +1,13 @@ #include -#include -#include -#include +#include +#include +#include #include #include #include "Core/Protocol.h" -#include +#include #include #include #include diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h similarity index 98% rename from src/DataStreams/RemoteQueryExecutor.h rename to src/QueryPipeline/RemoteQueryExecutor.h index d82f9983894..b7a2509ea97 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -21,8 +21,8 @@ using ThrottlerPtr = std::shared_ptr; struct Progress; using ProgressCallback = std::function; -struct BlockStreamProfileInfo; -using ProfileInfoCallback = std::function; +struct ProfileInfo; +using ProfileInfoCallback = std::function; class RemoteQueryExecutorReadContext; diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp similarity index 99% rename from src/DataStreams/RemoteQueryExecutorReadContext.cpp rename to src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 6bdf52d2831..5f7b5e24967 100644 --- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -1,6 +1,6 @@ #if defined(OS_LINUX) -#include +#include #include #include #include diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.h b/src/QueryPipeline/RemoteQueryExecutorReadContext.h similarity index 100% rename from src/DataStreams/RemoteQueryExecutorReadContext.h rename to src/QueryPipeline/RemoteQueryExecutorReadContext.h diff --git a/src/DataStreams/SizeLimits.cpp b/src/QueryPipeline/SizeLimits.cpp similarity index 97% rename from src/DataStreams/SizeLimits.cpp rename to src/QueryPipeline/SizeLimits.cpp index 06dde923e55..90005902f67 100644 --- a/src/DataStreams/SizeLimits.cpp +++ b/src/QueryPipeline/SizeLimits.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/DataStreams/SizeLimits.h b/src/QueryPipeline/SizeLimits.h similarity index 100% rename from src/DataStreams/SizeLimits.h rename to src/QueryPipeline/SizeLimits.h diff --git a/src/DataStreams/StreamLocalLimits.h b/src/QueryPipeline/StreamLocalLimits.h similarity index 91% rename from src/DataStreams/StreamLocalLimits.h rename to src/QueryPipeline/StreamLocalLimits.h index efda6a941cc..7f49a5d0b07 100644 --- a/src/DataStreams/StreamLocalLimits.h +++ b/src/QueryPipeline/StreamLocalLimits.h @@ -1,6 +1,6 @@ #pragma once -#include -#include +#include +#include namespace DB { diff --git a/src/DataStreams/examples/CMakeLists.txt b/src/QueryPipeline/examples/CMakeLists.txt similarity index 100% rename from src/DataStreams/examples/CMakeLists.txt rename to src/QueryPipeline/examples/CMakeLists.txt diff --git a/src/DataStreams/narrowBlockInputStreams.cpp b/src/QueryPipeline/narrowBlockInputStreams.cpp similarity index 97% rename from src/DataStreams/narrowBlockInputStreams.cpp rename to src/QueryPipeline/narrowBlockInputStreams.cpp index 17e44e38293..19bebe4a0bf 100644 --- a/src/DataStreams/narrowBlockInputStreams.cpp +++ b/src/QueryPipeline/narrowBlockInputStreams.cpp @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include "narrowBlockInputStreams.h" diff --git a/src/DataStreams/narrowBlockInputStreams.h b/src/QueryPipeline/narrowBlockInputStreams.h similarity index 100% rename from src/DataStreams/narrowBlockInputStreams.h rename to src/QueryPipeline/narrowBlockInputStreams.h diff --git a/src/Processors/printPipeline.cpp b/src/QueryPipeline/printPipeline.cpp similarity index 99% rename from src/Processors/printPipeline.cpp rename to src/QueryPipeline/printPipeline.cpp index cbf8cb3a77d..40c88502ed0 100644 --- a/src/Processors/printPipeline.cpp +++ b/src/QueryPipeline/printPipeline.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Processors/printPipeline.h b/src/QueryPipeline/printPipeline.h similarity index 100% rename from src/Processors/printPipeline.h rename to src/QueryPipeline/printPipeline.h diff --git a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp similarity index 83% rename from src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp rename to src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index 71cf41fcbab..fb10601216e 100644 --- a/src/DataStreams/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -4,10 +4,10 @@ #include #include #include -#include +#include #include -#include -#include +#include +#include using namespace DB; @@ -83,20 +83,23 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true); pipe.addTransform(std::move(transform)); QueryPipeline pipeline(std::move(pipe)); - pipeline.setNumThreads(1); - auto stream = std::make_shared(std::move(pipeline)); + PullingPipelineExecutor executor(pipeline); size_t total_rows = 0; - auto block1 = stream->read(); - auto block2 = stream->read(); - auto block3 = stream->read(); + Block block1; + Block block2; + Block block3; + executor.pull(block1); + executor.pull(block2); + executor.pull(block3); - EXPECT_EQ(stream->read(), Block()); + Block tmp_block; + ASSERT_FALSE(executor.pull(tmp_block)); for (const auto & block : {block1, block2, block3}) total_rows += block.rows(); @@ -127,19 +130,22 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, 0, false, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, 0, nullptr, false, true); pipe.addTransform(std::move(transform)); QueryPipeline pipeline(std::move(pipe)); - pipeline.setNumThreads(1); - auto stream = std::make_shared(std::move(pipeline)); + PullingPipelineExecutor executor(pipeline); - auto block1 = stream->read(); - auto block2 = stream->read(); - auto block3 = stream->read(); + Block block1; + Block block2; + Block block3; + executor.pull(block1); + executor.pull(block2); + executor.pull(block3); - EXPECT_EQ(stream->read(), Block()); + Block tmp_block; + ASSERT_FALSE(executor.pull(tmp_block)); EXPECT_EQ(block1.rows(), (1000 + 1500 + 1400) / 3); EXPECT_EQ(block2.rows(), (1000 + 1500 + 1400) / 3); diff --git a/src/DataStreams/tests/gtest_check_sorted_stream.cpp b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp similarity index 99% rename from src/DataStreams/tests/gtest_check_sorted_stream.cpp rename to src/QueryPipeline/tests/gtest_check_sorted_stream.cpp index 04ccc64fd7c..751f7ef8635 100644 --- a/src/DataStreams/tests/gtest_check_sorted_stream.cpp +++ b/src/QueryPipeline/tests/gtest_check_sorted_stream.cpp @@ -5,8 +5,8 @@ #include #include #include -#include -#include +#include +#include #include diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 1ef723cdd20..ba2644e0fba 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -11,8 +11,7 @@ #include #include #include -#include -#include +#include #include #include #include @@ -33,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -598,7 +597,7 @@ namespace void addProgressToResult(); void addTotalsToResult(const Block & totals); void addExtremesToResult(const Block & extremes); - void addProfileInfoToResult(const BlockStreamProfileInfo & info); + void addProfileInfoToResult(const ProfileInfo & info); void addLogsToResult(); void sendResult(); void throwIfFailedToSendResult(); @@ -1398,7 +1397,7 @@ namespace format->doWriteSuffix(); } - void Call::addProfileInfoToResult(const BlockStreamProfileInfo & info) + void Call::addProfileInfoToResult(const ProfileInfo & info) { auto & stats = *result.mutable_stats(); stats.set_rows(info.rows); diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index b19b02f960d..71099c23655 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -286,7 +286,7 @@ void KeeperTCPHandler::runImpl() return; } - if (keeper_dispatcher->hasLeader()) + if (keeper_dispatcher->checkInit() && keeper_dispatcher->hasLeader()) { try { @@ -306,7 +306,8 @@ void KeeperTCPHandler::runImpl() } else { - LOG_WARNING(log, "Ignoring user request, because no alive leader exist"); + String reason = keeper_dispatcher->checkInit() ? "server is not initialized yet" : "no alive leader exists"; + LOG_WARNING(log, "Ignoring user request, because {}", reason); sendHandshake(false); return; } diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 3720362775c..8f4f04e56c5 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 38185af4247..729cb33371a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -23,13 +23,14 @@ #include #include #include -#include -#include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -787,7 +788,7 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() out->next(); } -void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info) +void TCPHandler::sendProfileInfo(const ProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); info.write(*out); @@ -831,12 +832,6 @@ namespace { using namespace ProfileEvents; - enum ProfileEventTypes : int8_t - { - INCREMENT = 1, - GAUGE = 2, - }; - constexpr size_t NAME_COLUMN_INDEX = 4; constexpr size_t VALUE_COLUMN_INDEX = 5; @@ -879,7 +874,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::INCREMENT); + columns[i++]->insert(ProfileEvents::Type::INCREMENT); } } @@ -893,7 +888,7 @@ namespace columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEventTypes::GAUGE); + columns[i++]->insert(ProfileEvents::Type::GAUGE); columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); columns[i++]->insert(snapshot.memory_usage); @@ -907,18 +902,11 @@ void TCPHandler::sendProfileEvents() if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_PROFILE_EVENTS) return; - auto profile_event_type = std::make_shared( - DataTypeEnum8::Values - { - { "increment", static_cast(INCREMENT)}, - { "gauge", static_cast(GAUGE)}, - }); - NamesAndTypesList column_names_and_types = { { "host_name", std::make_shared() }, { "current_time", std::make_shared() }, { "thread_id", std::make_shared() }, - { "type", profile_event_type }, + { "type", ProfileEvents::TypeEnum }, { "name", std::make_shared() }, { "value", std::make_shared() }, }; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index b5d7d1f0776..73fc88526a9 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -9,11 +9,10 @@ #include #include #include -#include +#include #include #include -#include -#include +#include #include "IServer.h" @@ -31,7 +30,7 @@ namespace DB class Session; struct Settings; class ColumnsDescription; -struct BlockStreamProfileInfo; +struct ProfileInfo; /// State of query processing. struct QueryState @@ -228,7 +227,7 @@ private: void sendEndOfStream(); void sendPartUUIDs(); void sendReadTaskRequestAssumeLocked(); - void sendProfileInfo(const BlockStreamProfileInfo & info); + void sendProfileInfo(const ProfileInfo & info); void sendTotals(const Block & totals); void sendExtremes(const Block & extremes); void sendProfileEvents(); diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 1d057d1bb10..c5101f162ee 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -205,6 +205,13 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.sample_by = command_ast->sample_by; return command; } + else if (command_ast->type == ASTAlterCommand::REMOVE_SAMPLE_BY) + { + AlterCommand command; + command.ast = command_ast->clone(); + command.type = AlterCommand::REMOVE_SAMPLE_BY; + return command; + } else if (command_ast->type == ASTAlterCommand::ADD_INDEX) { AlterCommand command; @@ -463,6 +470,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { metadata.sampling_key.recalculateWithNewAST(sample_by, metadata.columns, context); } + else if (type == REMOVE_SAMPLE_BY) + { + metadata.sampling_key = {}; + } else if (type == COMMENT_COLUMN) { metadata.columns.modify(column_name, @@ -745,7 +756,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada return false; /// We remove properties on metadata level - if (isRemovingProperty() || type == REMOVE_TTL) + if (isRemovingProperty() || type == REMOVE_TTL || type == REMOVE_SAMPLE_BY) return false; if (type == DROP_COLUMN || type == DROP_INDEX || type == DROP_PROJECTION || type == RENAME_COLUMN) @@ -1208,6 +1219,10 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt { throw Exception{"Table doesn't have any table TTL expression, cannot remove", ErrorCodes::BAD_ARGUMENTS}; } + else if (command.type == AlterCommand::REMOVE_SAMPLE_BY && !metadata.hasSamplingKey()) + { + throw Exception{"Table doesn't have SAMPLE BY, cannot remove", ErrorCodes::BAD_ARGUMENTS}; + } /// Collect default expressions for MODIFY and ADD comands if (command.type == AlterCommand::MODIFY_COLUMN || command.type == AlterCommand::ADD_COLUMN) diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 046238bd5f5..dce6b496741 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -45,7 +45,8 @@ struct AlterCommand RENAME_COLUMN, REMOVE_TTL, MODIFY_DATABASE_SETTING, - COMMENT_TABLE + COMMENT_TABLE, + REMOVE_SAMPLE_BY, }; /// Which property user wants to remove from column diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 167e36ebbe3..254d82520dc 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 1841be22b72..cf349d1f8cf 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -14,8 +14,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index 3c0b6333fc1..668cec22e8b 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 19385e526a7..6e81f5577ab 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -23,7 +22,10 @@ #include #include #include -#include +#include +#include +#include +#include #include @@ -124,12 +126,13 @@ public: auto compression = chooseCompressionMethod(path, compression_method); read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri, path, getContext()->getGlobalContext()->getConfigRef()), compression); auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size); + pipeline = QueryPipeline(std::move(input_format)); - reader = std::make_shared(input_format); - reader->readPrefix(); + reader = std::make_unique(pipeline); } - if (auto res = reader->read()) + Block res; + if (reader->pull(res)) { Columns columns = res.getColumns(); UInt64 num_rows = res.rows(); @@ -153,15 +156,16 @@ public: return Chunk(std::move(columns), num_rows); } - reader->readSuffix(); reader.reset(); + pipeline.reset(); read_buf.reset(); } } private: std::unique_ptr read_buf; - BlockInputStreamPtr reader; + QueryPipeline pipeline; + std::unique_ptr reader; SourcesInfoPtr source_info; String uri; String format; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 8effb5ed9bf..7c158794caf 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ae430cb635e..74e17442fe8 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -2,12 +2,11 @@ #include #include -#include #include #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index ad48858b658..3e24608a180 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -1,9 +1,7 @@ #include -#include #include #include -#include #include #include #include diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 903ea81946d..39688060b0a 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -16,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 6cf7ce59fa2..f010ef50ecd 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index d387c5f5e53..ec748d4d43a 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,6 @@ limitations under the License. */ #include #include #include -#include #include #include #include diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index ccf8a2e606d..06143e954f8 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace ProfileEvents diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 9e09cd0036e..a1df4a13e6a 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 5e12d5da678..d0d3f283478 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index e334cd486ef..48fd9e583bf 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -35,8 +35,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart( [&](const ISerialization::SubstreamPath & substream_path) { ++stream_counts[ISerialization::getFileNameForStream(column, substream_path)]; - }, - {}); + }); } NameSet remove_files; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index aa3f91a4f00..5cb819c44a4 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -1,27 +1,27 @@ -#include "Storages/MergeTree/MergeTask.h" +#include #include #include #include -#include "Common/ActionBlocker.h" +#include -#include "Storages/MergeTree/MergeTreeData.h" -#include "Storages/MergeTree/IMergeTreeDataPart.h" -#include "Storages/MergeTree/MergeTreeSequentialSource.h" -#include "Storages/MergeTree/FutureMergedMutatedPart.h" -#include "Processors/Transforms/ExpressionTransform.h" -#include "Processors/Transforms/MaterializingTransform.h" -#include "Processors/Merges/MergingSortedTransform.h" -#include "Processors/Merges/CollapsingSortedTransform.h" -#include "Processors/Merges/SummingSortedTransform.h" -#include "Processors/Merges/ReplacingSortedTransform.h" -#include "Processors/Merges/GraphiteRollupSortedTransform.h" -#include "Processors/Merges/AggregatingSortedTransform.h" -#include "Processors/Merges/VersionedCollapsingTransform.h" -#include "Processors/Executors/PipelineExecutingBlockInputStream.h" -#include "DataStreams/TTLBlockInputStream.h" -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace DB @@ -117,11 +117,23 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() } ctx->disk = global_ctx->space_reservation->getDisk(); - auto local_new_part_relative_tmp_path_name = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix; - auto local_new_part_tmp_path = global_ctx->data->relative_data_path + local_new_part_relative_tmp_path_name + "/"; + + String local_part_path = global_ctx->data->relative_data_path; + String local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + (global_ctx->parent_part ? ".proj" : ""); + String local_new_part_tmp_path = local_part_path + local_tmp_part_basename + "/"; + if (ctx->disk->exists(local_new_part_tmp_path)) throw Exception("Directory " + fullPath(ctx->disk, local_new_part_tmp_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + { + std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); + global_ctx->mutator->tmp_parts.emplace(local_tmp_part_basename); + } + SCOPE_EXIT( + std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); + global_ctx->mutator->tmp_parts.erase(local_tmp_part_basename); + ); + global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); @@ -142,7 +154,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->future_part->type, global_ctx->future_part->part_info, local_single_disk_volume, - local_new_part_relative_tmp_path_name, + local_tmp_part_basename, global_ctx->parent_part); global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; @@ -561,6 +573,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c global_ctx->new_data_part.get(), ".proj", global_ctx->data, + global_ctx->mutator, global_ctx->merges_blocker, global_ctx->ttl_merges_blocker)); } @@ -765,7 +778,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() { case MergeTreeData::MergingParams::Ordinary: merged_transform = std::make_shared( - header, pipes.size(), sort_description, merge_block_size, 0, false, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); + header, pipes.size(), sort_description, merge_block_size, 0, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); break; case MergeTreeData::MergingParams::Collapsing: diff --git a/src/Storages/MergeTree/MergeTask.h b/src/Storages/MergeTree/MergeTask.h index 22dc70bd78c..80c8e7165f8 100644 --- a/src/Storages/MergeTree/MergeTask.h +++ b/src/Storages/MergeTree/MergeTask.h @@ -60,6 +60,7 @@ public: const IMergeTreeDataPart * parent_part_, String suffix_, MergeTreeData * data_, + MergeTreeDataMergerMutator * mutator_, ActionBlocker * merges_blocker_, ActionBlocker * ttl_merges_blocker_) { @@ -78,6 +79,7 @@ public: global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_); global_ctx->parent_part = std::move(parent_part_); global_ctx->data = std::move(data_); + global_ctx->mutator = std::move(mutator_); global_ctx->merges_blocker = std::move(merges_blocker_); global_ctx->ttl_merges_blocker = std::move(ttl_merges_blocker_); @@ -121,6 +123,7 @@ private: std::unique_ptr projection_merge_list_element; MergeListElement * merge_list_element_ptr{nullptr}; MergeTreeData * data{nullptr}; + MergeTreeDataMergerMutator * mutator{nullptr}; ActionBlocker * merges_blocker{nullptr}; ActionBlocker * ttl_merges_blocker{nullptr}; StorageMetadataPtr metadata_snapshot{nullptr}; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 93594dd4357..88f3052e833 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -260,8 +260,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & data_part, const Names & required_columns, - const PrewhereInfoPtr & prewhere_info, - bool check_columns) + const PrewhereInfoPtr & prewhere_info) { Names column_names = required_columns; Names pre_column_names; @@ -308,18 +307,9 @@ MergeTreeReadTaskColumns getReadTaskColumns( MergeTreeReadTaskColumns result; - if (check_columns) - { - const auto & columns = metadata_snapshot->getColumns(); - result.pre_columns = columns.getByNames(ColumnsDescription::All, pre_column_names, true); - result.columns = columns.getByNames(ColumnsDescription::All, column_names, true); - } - else - { - result.pre_columns = data_part->getColumns().addTypes(pre_column_names); - result.columns = data_part->getColumns().addTypes(column_names); - } - + auto columns = metadata_snapshot->getColumns(); + result.pre_columns = columns.getByNames(ColumnsDescription::All, pre_column_names, true); + result.columns = columns.getByNames(ColumnsDescription::All, column_names, true); result.should_reorder = should_reorder; return result; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 4c4081bd83b..2dfe6fcf06d 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -78,8 +78,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & data_part, const Names & required_columns, - const PrewhereInfoPtr & prewhere_info, - bool check_columns); + const PrewhereInfoPtr & prewhere_info); struct MergeTreeBlockSizePredictor { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 790b95a9fa9..57729ef2eda 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -56,6 +54,7 @@ #include #include #include +#include #include #include @@ -206,6 +205,8 @@ MergeTreeData::MergeTreeData( , background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext()) , background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext()) { + context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded(); + const auto settings = getSettings(); allow_nullable_key = attach || settings->allow_nullable_key; @@ -249,7 +250,7 @@ MergeTreeData::MergeTreeData( { /// This is for backward compatibility. checkSampleExpression(metadata_, attach || settings->compatibility_allow_sampling_expression_not_in_primary_key, - settings->check_sample_column_is_correct); + settings->check_sample_column_is_correct && !attach); } checkTTLExpressions(metadata_, metadata_); @@ -896,6 +897,261 @@ Int64 MergeTreeData::getMaxBlockNumber() const return max_block_num; } +void MergeTreeData::loadDataPartsFromDisk( + DataPartsVector & broken_parts_to_detach, + DataPartsVector & duplicate_parts_to_remove, + ThreadPool & pool, + size_t num_parts, + std::queue>> & parts_queue, + bool skip_sanity_checks, + const MergeTreeSettingsPtr & settings) +{ + /// Parallel loading of data parts. + pool.setMaxThreads(std::min(size_t(settings->max_part_loading_threads), num_parts)); + size_t num_threads = pool.getMaxThreads(); + std::vector parts_per_thread(num_threads, num_parts / num_threads); + for (size_t i = 0ul; i < num_parts % num_threads; ++i) + ++parts_per_thread[i]; + + /// Prepare data parts for parallel loading. Threads will focus on given disk first, then steal + /// others' tasks when finish current disk part loading process. + std::vector>> threads_parts(num_threads); + std::set remaining_thread_parts; + std::queue threads_queue; + for (size_t i = 0; i < num_threads; ++i) + { + remaining_thread_parts.insert(i); + threads_queue.push(i); + } + + while (!parts_queue.empty()) + { + assert(!threads_queue.empty()); + size_t i = threads_queue.front(); + auto & need_parts = parts_per_thread[i]; + assert(need_parts > 0); + auto & thread_parts = threads_parts[i]; + auto & current_parts = parts_queue.front(); + assert(!current_parts.empty()); + auto parts_to_grab = std::min(need_parts, current_parts.size()); + + thread_parts.insert(thread_parts.end(), current_parts.end() - parts_to_grab, current_parts.end()); + current_parts.resize(current_parts.size() - parts_to_grab); + need_parts -= parts_to_grab; + + /// Before processing next thread, change disk if possible. + /// Different threads will likely start loading parts from different disk, + /// which may improve read parallelism for JBOD. + + /// If current disk still has some parts, push it to the tail. + if (!current_parts.empty()) + parts_queue.push(std::move(current_parts)); + parts_queue.pop(); + + /// If current thread still want some parts, push it to the tail. + if (need_parts > 0) + threads_queue.push(i); + threads_queue.pop(); + } + assert(threads_queue.empty()); + assert(std::all_of(threads_parts.begin(), threads_parts.end(), [](const std::vector> & parts) + { + return !parts.empty(); + })); + + size_t suspicious_broken_parts = 0; + size_t suspicious_broken_parts_bytes = 0; + std::atomic has_adaptive_parts = false; + std::atomic has_non_adaptive_parts = false; + + std::mutex mutex; + auto load_part = [&](const String & part_name, const DiskPtr & part_disk_ptr) + { + auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version); + if (!part_opt) + return; + const auto & part_info = *part_opt; + auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); + auto part = createPart(part_name, part_info, single_disk_volume, part_name); + bool broken = false; + + String part_path = fs::path(relative_data_path) / part_name; + String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME; + if (part_disk_ptr->exists(marker_path)) + { + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); + LOG_WARNING(log, + "Detaching stale part {}{} (size: {}), which should have been deleted after a move. " + "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.", + getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); + std::lock_guard loading_lock(mutex); + broken_parts_to_detach.push_back(part); + ++suspicious_broken_parts; + suspicious_broken_parts_bytes += size_of_part; + return; + } + + try + { + part->loadColumnsChecksumsIndexes(require_part_metadata, true); + } + catch (const Exception & e) + { + /// Don't count the part as broken if there is not enough memory to load it. + /// In fact, there can be many similar situations. + /// But it is OK, because there is a safety guard against deleting too many parts. + if (isNotEnoughMemoryErrorCode(e.code())) + throw; + + broken = true; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + catch (...) + { + broken = true; + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + /// Ignore broken parts that can appear as a result of hard server restart. + if (broken) + { + /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist + size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); + + LOG_ERROR(log, + "Detaching broken part {}{} (size: {}). " + "If it happened after update, it is likely because of backward incompability. " + "You need to resolve this manually", + getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); + std::lock_guard loading_lock(mutex); + broken_parts_to_detach.push_back(part); + ++suspicious_broken_parts; + suspicious_broken_parts_bytes += size_of_part; + return; + } + if (!part->index_granularity_info.is_adaptive) + has_non_adaptive_parts.store(true, std::memory_order_relaxed); + else + has_adaptive_parts.store(true, std::memory_order_relaxed); + + part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime(); + /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later + part->setState(DataPartState::Committed); + + std::lock_guard loading_lock(mutex); + auto [it, inserted] = data_parts_indexes.insert(part); + /// Remove duplicate parts with the same checksum. + if (!inserted) + { + if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) + { + LOG_ERROR(log, "Remove duplicate part {}", part->getFullPath()); + duplicate_parts_to_remove.push_back(part); + } + else + throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + } + + addPartContributionToDataVolume(part); + }; + + std::mutex part_select_mutex; + try + { + for (size_t thread = 0; thread < num_threads; ++thread) + { + pool.scheduleOrThrowOnError([&, thread] + { + while (true) + { + std::pair thread_part; + { + const std::lock_guard lock{part_select_mutex}; + + if (remaining_thread_parts.empty()) + return; + + /// Steal task if nothing to do + auto thread_idx = thread; + if (threads_parts[thread].empty()) + { + // Try random steal tasks from the next thread + std::uniform_int_distribution distribution(0, remaining_thread_parts.size() - 1); + auto it = remaining_thread_parts.begin(); + std::advance(it, distribution(thread_local_rng)); + thread_idx = *it; + } + auto & thread_parts = threads_parts[thread_idx]; + thread_part = thread_parts.back(); + thread_parts.pop_back(); + if (thread_parts.empty()) + remaining_thread_parts.erase(thread_idx); + } + load_part(thread_part.first, thread_part.second); + } + }); + } + } + catch (...) + { + /// If this is not done, then in case of an exception, tasks will be destroyed before the threads are completed, and it will be bad. + pool.wait(); + throw; + } + + pool.wait(); + + if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts) + throw Exception( + "Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", + ErrorCodes::LOGICAL_ERROR); + + has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; + + if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) + throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, + "Suspiciously many ({}) broken parts to remove.", + suspicious_broken_parts); + + if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) + throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, + "Suspiciously big size ({}) of all broken parts to remove.", + formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes)); +} + + +void MergeTreeData::loadDataPartsFromWAL( + DataPartsVector & /* broken_parts_to_detach */, + DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & parts_from_wal, + DataPartsLock & part_lock) +{ + for (auto & part : parts_from_wal) + { + if (getActiveContainingPart(part->info, DataPartState::Committed, part_lock)) + continue; + + part->modification_time = time(nullptr); + /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later + part->setState(DataPartState::Committed); + + auto [it, inserted] = data_parts_indexes.insert(part); + if (!inserted) + { + if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) + { + LOG_ERROR(log, "Remove duplicate part {}", part->getFullPath()); + duplicate_parts_to_remove.push_back(part); + } + else + throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + } + + addPartContributionToDataVolume(part); + } +} + void MergeTreeData::loadDataParts(bool skip_sanity_checks) { @@ -903,7 +1159,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) auto metadata_snapshot = getInMemoryMetadataPtr(); const auto settings = getSettings(); - std::vector> part_names_with_disks; MutableDataPartsVector parts_from_wal; Strings part_file_names; @@ -933,193 +1188,90 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } } - /// Reversed order to load part from low priority disks firstly. - /// Used for keep part on low priority disk if duplication found - for (auto disk_it = disks.rbegin(); disk_it != disks.rend(); ++disk_it) + /// Collect part names by disk. + std::map>> disk_part_map; + std::map disk_wal_part_map; + ThreadPool pool(disks.size()); + std::mutex wal_init_lock; + for (const auto & disk_ptr : disks) { - auto disk_ptr = *disk_it; + auto & disk_parts = disk_part_map[disk_ptr->getName()]; + auto & disk_wal_parts = disk_wal_part_map[disk_ptr->getName()]; - for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next()) + pool.scheduleOrThrowOnError([&, disk_ptr]() { - /// Skip temporary directories, file 'format_version.txt' and directory 'detached'. - if (startsWith(it->name(), "tmp") - || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME - || it->name() == MergeTreeData::DETACHED_DIR_NAME) - continue; - - if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) - part_names_with_disks.emplace_back(it->name(), disk_ptr); - else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal) + for (auto it = disk_ptr->iterateDirectory(relative_data_path); it->isValid(); it->next()) { - /// Create and correctly initialize global WAL object - write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); - for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext())) - parts_from_wal.push_back(std::move(part)); + /// Skip temporary directories, file 'format_version.txt' and directory 'detached'. + if (startsWith(it->name(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME + || it->name() == MergeTreeData::DETACHED_DIR_NAME) + continue; + + if (!startsWith(it->name(), MergeTreeWriteAheadLog::WAL_FILE_NAME)) + disk_parts.emplace_back(std::make_pair(it->name(), disk_ptr)); + else if (it->name() == MergeTreeWriteAheadLog::DEFAULT_WAL_FILE_NAME && settings->in_memory_parts_enable_wal) + { + std::unique_lock lock(wal_init_lock); + if (write_ahead_log != nullptr) + throw Exception( + "There are multiple WAL files appeared in current storage policy. You need to resolve this manually", + ErrorCodes::CORRUPTED_DATA); + + write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); + for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext())) + disk_wal_parts.push_back(std::move(part)); + } + else if (settings->in_memory_parts_enable_wal) + { + MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); + for (auto && part : wal.restore(metadata_snapshot, getContext())) + disk_wal_parts.push_back(std::move(part)); + } } - else if (settings->in_memory_parts_enable_wal) - { - MergeTreeWriteAheadLog wal(*this, disk_ptr, it->name()); - for (auto && part : wal.restore(metadata_snapshot, getContext())) - parts_from_wal.push_back(std::move(part)); - } - } - } - - auto part_lock = lockParts(); - data_parts_indexes.clear(); - - if (part_names_with_disks.empty() && parts_from_wal.empty()) - { - LOG_DEBUG(log, "There are no data parts"); - return; - } - - /// Parallel loading of data parts. - size_t num_threads = std::min(size_t(settings->max_part_loading_threads), part_names_with_disks.size()); - - std::mutex mutex; - - DataPartsVector broken_parts_to_detach; - size_t suspicious_broken_parts = 0; - size_t suspicious_broken_parts_bytes = 0; - - std::atomic has_adaptive_parts = false; - std::atomic has_non_adaptive_parts = false; - - ThreadPool pool(num_threads); - - for (auto & part_names_with_disk : part_names_with_disks) - { - pool.scheduleOrThrowOnError([&] - { - const auto & [part_name, part_disk_ptr] = part_names_with_disk; - - auto part_opt = MergeTreePartInfo::tryParsePartName(part_name, format_version); - - if (!part_opt) - return; - - auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); - auto part = createPart(part_name, *part_opt, single_disk_volume, part_name); - bool broken = false; - - String part_path = fs::path(relative_data_path) / part_name; - String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME; - - if (part_disk_ptr->exists(marker_path)) - { - /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist - size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); - LOG_WARNING(log, - "Detaching stale part {}{} (size: {}), which should have been deleted after a move. " - "That can only happen after unclean restart of ClickHouse after move of a part having an operation blocking that stale copy of part.", - getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); - std::lock_guard loading_lock(mutex); - - broken_parts_to_detach.push_back(part); - - ++suspicious_broken_parts; - suspicious_broken_parts_bytes += size_of_part; - - return; - } - - try - { - part->loadColumnsChecksumsIndexes(require_part_metadata, true); - } - catch (const Exception & e) - { - /// Don't count the part as broken if there is not enough memory to load it. - /// In fact, there can be many similar situations. - /// But it is OK, because there is a safety guard against deleting too many parts. - if (isNotEnoughMemoryErrorCode(e.code())) - throw; - - broken = true; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - catch (...) - { - broken = true; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - /// Ignore broken parts that can appear as a result of hard server restart. - if (broken) - { - /// NOTE: getBytesOnDisk() cannot be used here, since it maybe zero of checksums.txt will not exist - size_t size_of_part = IMergeTreeDataPart::calculateTotalSizeOnDisk(part->volume->getDisk(), part->getFullRelativePath()); - - LOG_ERROR(log, - "Detaching broken part {}{} (size: {}). " - "If it happened after update, it is likely because of backward incompability. " - "You need to resolve this manually", - getFullPathOnDisk(part_disk_ptr), part_name, formatReadableSizeWithBinarySuffix(size_of_part)); - std::lock_guard loading_lock(mutex); - - broken_parts_to_detach.push_back(part); - - ++suspicious_broken_parts; - suspicious_broken_parts_bytes += size_of_part; - - return; - } - - if (!part->index_granularity_info.is_adaptive) - has_non_adaptive_parts.store(true, std::memory_order_relaxed); - else - has_adaptive_parts.store(true, std::memory_order_relaxed); - - part->modification_time = part_disk_ptr->getLastModified(fs::path(relative_data_path) / part_name).epochTime(); - - /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later - part->setState(DataPartState::Committed); - - std::lock_guard loading_lock(mutex); - - if (!data_parts_indexes.insert(part).second) - throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists", part->name); - - addPartContributionToDataVolume(part); }); } pool.wait(); - for (auto & part : parts_from_wal) + for (auto & [_, disk_wal_parts] : disk_wal_part_map) + parts_from_wal.insert( + parts_from_wal.end(), std::make_move_iterator(disk_wal_parts.begin()), std::make_move_iterator(disk_wal_parts.end())); + + size_t num_parts = 0; + std::queue>> parts_queue; + for (auto & [_, disk_parts] : disk_part_map) { - if (getActiveContainingPart(part->info, DataPartState::Committed, part_lock)) + if (disk_parts.empty()) continue; - - part->modification_time = time(nullptr); - /// Assume that all parts are Committed, covered parts will be detected and marked as Outdated later - part->setState(DataPartState::Committed); - - if (!data_parts_indexes.insert(part).second) - throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART); - - addPartContributionToDataVolume(part); + num_parts += disk_parts.size(); + parts_queue.push(std::move(disk_parts)); } - if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts) - throw Exception("Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", ErrorCodes::LOGICAL_ERROR); + auto part_lock = lockParts(); + data_parts_indexes.clear(); - has_non_adaptive_index_granularity_parts = has_non_adaptive_parts; + if (num_parts == 0 && parts_from_wal.empty()) + { + LOG_DEBUG(log, "There are no data parts"); + return; + } - if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) - throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, - "Suspiciously many ({}) broken parts to remove.", - suspicious_broken_parts); - if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) - throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, - "Suspiciously big size ({}) of all broken parts to remove.", - formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes)); + DataPartsVector broken_parts_to_detach; + DataPartsVector duplicate_parts_to_remove; + + if (num_parts > 0) + loadDataPartsFromDisk( + broken_parts_to_detach, duplicate_parts_to_remove, pool, num_parts, parts_queue, skip_sanity_checks, settings); + + if (!parts_from_wal.empty()) + loadDataPartsFromWAL(broken_parts_to_detach, duplicate_parts_to_remove, parts_from_wal, part_lock); for (auto & part : broken_parts_to_detach) part->renameToDetached("broken-on-start"); /// detached parts must not have '_' in prefixes + for (auto & part : duplicate_parts_to_remove) + part->remove(); /// Delete from the set of current parts those parts that are covered by another part (those parts that /// were merged), but that for some reason are still not deleted from the filesystem. @@ -1193,7 +1345,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds) +void MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -1209,35 +1361,44 @@ void MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifet { for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) { - if (startsWith(it->name(), "tmp_")) + const std::string & basename = it->name(); + if (!startsWith(basename, "tmp_")) { - try + continue; + } + const std::string & full_path = fullPath(disk, it->path()); + if (merger_mutator.hasTemporaryPart(basename)) + { + LOG_WARNING(log, "{} is an active destination for one of merge/mutation (consider increasing temporary_directories_lifetime setting)", full_path); + continue; + } + + try + { + if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) { - if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) - { - LOG_WARNING(log, "Removing temporary directory {}", fullPath(disk, it->path())); - disk->removeRecursive(it->path()); - } + LOG_WARNING(log, "Removing temporary directory {}", full_path); + disk->removeRecursive(it->path()); } - /// see getModificationTime() - catch (const ErrnoException & e) + } + /// see getModificationTime() + catch (const ErrnoException & e) + { + if (e.getErrno() == ENOENT) { - if (e.getErrno() == ENOENT) - { - /// If the file is already deleted, do nothing. - } - else - throw; + /// If the file is already deleted, do nothing. } - catch (const fs::filesystem_error & e) + else + throw; + } + catch (const fs::filesystem_error & e) + { + if (e.code() == std::errc::no_such_file_or_directory) { - if (e.code() == std::errc::no_such_file_or_directory) - { - /// If the file is already deleted, do nothing. - } - else - throw; + /// If the file is already deleted, do nothing. } + else + throw; } } } @@ -3513,9 +3674,12 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc buf, metadata_snapshot->getPartitionKey().sample_block, local_context->getSettingsRef().max_block_size); - auto input_stream = std::make_shared(input_format); + QueryPipeline pipeline(std::move(input_format)); + PullingPipelineExecutor executor(pipeline); + + Block block; + executor.pull(block); - auto block = input_stream->read(); if (!block || !block.rows()) throw Exception( "Could not parse partition value: `" + partition_ast.fields_str + "`", @@ -4245,6 +4409,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, const SelectQueryInfo & query_info, + const DataPartsVector & parts, + DataPartsVector & normal_parts, ContextPtr query_context) const { if (!metadata_snapshot->minmax_count_projection) @@ -4252,9 +4418,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( "Cannot find the definition of minmax_count projection but it's used in current query. It's a bug", ErrorCodes::LOGICAL_ERROR); - auto block = metadata_snapshot->minmax_count_projection->sample_block; - auto minmax_count_columns = block.mutateColumns(); + auto block = metadata_snapshot->minmax_count_projection->sample_block.cloneEmpty(); + bool need_primary_key_max_column = false; + const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name; + if (!primary_key_max_column_name.empty()) + { + need_primary_key_max_column = std::any_of( + required_columns.begin(), required_columns.end(), [&](const auto & name) { return primary_key_max_column_name == name; }); + } + auto minmax_count_columns = block.mutateColumns(); auto insert = [](ColumnAggregateFunction & column, const Field & value) { auto func = column.getAggregateFunction(); @@ -4269,7 +4442,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( column.insertFrom(place); }; - auto parts = getDataPartsVector(); ASTPtr expression_ast; Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */); if (virtual_columns_block.rows() == 0) @@ -4293,26 +4465,33 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( if (!part->minmax_idx->initialized) throw Exception("Found a non-empty part with uninitialized minmax_idx. It's a bug", ErrorCodes::LOGICAL_ERROR); - size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); - if (2 * minmax_idx_size + 1 != minmax_count_columns.size()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "minmax_count projection should have twice plus one the number of ranges in minmax_idx. 2 * minmax_idx_size + 1 = {}, " - "minmax_count_columns.size() = {}. It's a bug", - 2 * minmax_idx_size + 1, - minmax_count_columns.size()); + if (need_primary_key_max_column && !part->index_granularity.hasFinalMark()) + { + normal_parts.push_back(part); + continue; + } + size_t pos = 0; + size_t minmax_idx_size = part->minmax_idx->hyperrectangle.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { - size_t min_pos = i * 2; - size_t max_pos = i * 2 + 1; - auto & min_column = assert_cast(*minmax_count_columns[min_pos]); - auto & max_column = assert_cast(*minmax_count_columns[max_pos]); + auto & min_column = assert_cast(*minmax_count_columns[pos++]); + auto & max_column = assert_cast(*minmax_count_columns[pos++]); const auto & range = part->minmax_idx->hyperrectangle[i]; insert(min_column, range.left); insert(max_column, range.right); } + if (!primary_key_max_column_name.empty()) + { + const auto & primary_key_column = *part->index[0]; + auto primary_key_column_size = primary_key_column.size(); + auto & min_column = assert_cast(*minmax_count_columns[pos++]); + auto & max_column = assert_cast(*minmax_count_columns[pos++]); + insert(min_column, primary_key_column[0]); + insert(max_column, primary_key_column[primary_key_column_size - 1]); + } + { auto & column = assert_cast(*minmax_count_columns.back()); auto func = column.getAggregateFunction(); @@ -4352,7 +4531,7 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return false; - const auto & query_ptr = query_info.query; + const auto & query_ptr = query_info.original_query; if (auto * select = query_ptr->as(); select) { @@ -4569,33 +4748,74 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( size_t min_sum_marks = std::numeric_limits::max(); if (metadata_snapshot->minmax_count_projection) add_projection_candidate(*metadata_snapshot->minmax_count_projection); + std::optional minmax_conut_projection_candidate; + if (!candidates.empty()) + { + minmax_conut_projection_candidate.emplace(std::move(candidates.front())); + candidates.clear(); + } + MergeTreeDataSelectExecutor reader(*this); + std::shared_ptr max_added_blocks; + if (settings.select_sequential_consistency) + { + if (const StorageReplicatedMergeTree * replicated = dynamic_cast(this)) + max_added_blocks = std::make_shared(replicated->getMaxAddedBlocks()); + } + auto parts = getDataPartsVector(); - // Only add more projection candidates if minmax_count_projection cannot match. - if (candidates.empty()) + // If minmax_count_projection is a valid candidate, check its completeness. + if (minmax_conut_projection_candidate) + { + DataPartsVector normal_parts; + query_info.minmax_count_projection_block = getMinMaxCountProjectionBlock( + metadata_snapshot, minmax_conut_projection_candidate->required_columns, query_info, parts, normal_parts, query_context); + + if (normal_parts.empty()) + { + selected_candidate = &*minmax_conut_projection_candidate; + selected_candidate->complete = true; + min_sum_marks = query_info.minmax_count_projection_block.rows(); + } + else + { + if (normal_parts.size() == parts.size()) + { + // minmax_count_projection is useless. + } + else + { + auto normal_result_ptr = reader.estimateNumMarksToRead( + normal_parts, + analysis_result.required_columns, + metadata_snapshot, + metadata_snapshot, + query_info, + query_context, + settings.max_threads, + max_added_blocks); + + if (!normal_result_ptr->error()) + { + selected_candidate = &*minmax_conut_projection_candidate; + selected_candidate->merge_tree_normal_select_result_ptr = normal_result_ptr; + min_sum_marks = query_info.minmax_count_projection_block.rows() + normal_result_ptr->marks(); + } + } + + // We cannot find a complete match of minmax_count_projection, add more projections to check. + for (const auto & projection : metadata_snapshot->projections) + add_projection_candidate(projection); + } + } + else { for (const auto & projection : metadata_snapshot->projections) add_projection_candidate(projection); } - else - { - selected_candidate = &candidates.front(); - query_info.minmax_count_projection_block - = getMinMaxCountProjectionBlock(metadata_snapshot, selected_candidate->required_columns, query_info, query_context); - min_sum_marks = query_info.minmax_count_projection_block.rows(); - } // Let's select the best projection to execute the query. - if (!candidates.empty() && !selected_candidate) + if (!candidates.empty()) { - std::shared_ptr max_added_blocks; - if (settings.select_sequential_consistency) - { - if (const StorageReplicatedMergeTree * replicated = dynamic_cast(this)) - max_added_blocks = std::make_shared(replicated->getMaxAddedBlocks()); - } - - auto parts = getDataPartsVector(); - MergeTreeDataSelectExecutor reader(*this); query_info.merge_tree_select_result_ptr = reader.estimateNumMarksToRead( parts, analysis_result.required_columns, @@ -4610,7 +4830,12 @@ bool MergeTreeData::getQueryProcessingStageWithAggregateProjection( { // Add 1 to base sum_marks so that we prefer projections even when they have equal number of marks to read. // NOTE: It is not clear if we need it. E.g. projections do not support skip index for now. - min_sum_marks = query_info.merge_tree_select_result_ptr->marks() + 1; + auto sum_marks = query_info.merge_tree_select_result_ptr->marks() + 1; + if (sum_marks < min_sum_marks) + { + selected_candidate = nullptr; + min_sum_marks = sum_marks; + } } /// Favor aggregate projections diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index e7f1db8f3ec..2ea6a89002c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -39,6 +39,7 @@ namespace DB class AlterCommands; class MergeTreePartsMover; +class MergeTreeDataMergerMutator; class MutationCommands; class Context; struct JobAndPool; @@ -364,10 +365,22 @@ public: bool attach, BrokenPartCallback broken_part_callback_ = [](const String &){}); + /// Build a block of minmax and count values of a MergeTree table. These values are extracted + /// from minmax_indices, the first expression of primary key, and part rows. + /// + /// query_info - used to filter unneeded parts + /// + /// parts - part set to filter + /// + /// normal_parts - collects parts that don't have all the needed values to form the block. + /// Specifically, this is when a part doesn't contain a final mark and the related max value is + /// required. Block getMinMaxCountProjectionBlock( const StorageMetadataPtr & metadata_snapshot, const Names & required_columns, const SelectQueryInfo & query_info, + const DataPartsVector & parts, + DataPartsVector & normal_parts, ContextPtr query_context) const; bool getQueryProcessingStageWithAggregateProjection( @@ -393,6 +406,7 @@ public: || merging_params.mode == MergingParams::Summing || merging_params.mode == MergingParams::Aggregating || merging_params.mode == MergingParams::Replacing + || merging_params.mode == MergingParams::Graphite || merging_params.mode == MergingParams::VersionedCollapsing; } @@ -536,7 +550,7 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - void clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds); + void clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds); void clearEmptyParts(); @@ -1143,6 +1157,21 @@ private: /// Returns default settings for storage with possible changes from global config. virtual std::unique_ptr getDefaultSettings() const = 0; + + void loadDataPartsFromDisk( + DataPartsVector & broken_parts_to_detach, + DataPartsVector & duplicate_parts_to_remove, + ThreadPool & pool, + size_t num_parts, + std::queue>> & parts_queue, + bool skip_sanity_checks, + const MergeTreeSettingsPtr & settings); + + void loadDataPartsFromWAL( + DataPartsVector & broken_parts_to_detach, + DataPartsVector & duplicate_parts_to_remove, + MutableDataPartsVector & parts_from_wal, + DataPartsLock & part_lock); }; /// RAII struct to record big parts that are submerging or emerging. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 5d97c64b49b..c1637ab538b 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -444,6 +443,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( parent_part, suffix, &data, + this, &merges_blocker, &ttl_merges_blocker); } @@ -774,4 +774,10 @@ ExecuteTTLType MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadat } +bool MergeTreeDataMergerMutator::hasTemporaryPart(const std::string & basename) const +{ + std::lock_guard lock(tmp_parts_lock); + return tmp_parts.contains(basename); +} + } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 22650ac4eca..e5c8a4d8285 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -136,6 +137,7 @@ private: MergeTreeData::DataPartsVector selectAllPartsFromPartition(const String & partition_id); friend class MutateTask; + friend class MergeTask; /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. @@ -190,6 +192,26 @@ private: ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition; /// Performing TTL merges independently for each partition guarantees that /// there is only a limited number of TTL merges and no partition stores data, that is too stale + +public: + /// Returns true if passed part name is active. + /// (is the destination for one of active mutation/merge). + /// + /// NOTE: that it accept basename (i.e. dirname), not the path, + /// since later requires canonical form. + bool hasTemporaryPart(const std::string & basename) const; + +private: + /// Set of active temporary paths that is used as the destination. + /// List of such paths is required to avoid trying to remove them during cleanup. + /// + /// NOTE: It is pretty short, so use STL is fine. + std::unordered_set tmp_parts; + /// Lock for "tmp_parts". + /// + /// NOTE: mutable is required to mark hasTemporaryPath() const + mutable std::mutex tmp_parts_lock; + }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 11e080fda6c..2f25cf7d12a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -98,7 +98,7 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( auto mrk_checksum = checksums.files.find(file_name + index_granularity_info.marks_file_extension); if (mrk_checksum != checksums.files.end()) size.marks += mrk_checksum->second.file_size; - }, {}); + }); return size; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 23a7b205a1b..5d17d6235e1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -39,19 +39,21 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc) { - IDataType::StreamCallbackWithType callback = [&] (const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type) + ISerialization::StreamCallback callback = [&](const auto & substream_path) { + assert(!substream_path.empty()); String stream_name = ISerialization::getFileNameForStream(column, substream_path); /// Shared offsets for Nested type. if (compressed_streams.count(stream_name)) return; + const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; /// If we can use special codec than just get it if (ISerialization::isSpecialCompressionAllowed(substream_path)) - compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec); + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, subtype.get(), default_codec); else /// otherwise return only generic codecs and don't use info about data_type compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); @@ -63,7 +65,8 @@ void MergeTreeDataPartWriterCompact::addStreams(const NameAndTypePair & column, compressed_streams.emplace(stream_name, stream); }; - column.type->enumerateStreams(serializations[column.name], callback); + ISerialization::SubstreamPath path; + serializations[column.name]->enumerateStreams(path, callback, column.type, nullptr); } namespace diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index d952950e461..e64ba9edec0 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 8fccfbb1f90..224a197c3c8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -90,17 +90,20 @@ void MergeTreeDataPartWriterWide::addStreams( const NameAndTypePair & column, const ASTPtr & effective_codec_desc) { - IDataType::StreamCallbackWithType callback = [&] (const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type) + ISerialization::StreamCallback callback = [&](const auto & substream_path) { + assert(!substream_path.empty()); String stream_name = ISerialization::getFileNameForStream(column, substream_path); /// Shared offsets for Nested type. if (column_streams.count(stream_name)) return; + const auto & subtype = substream_path.back().data.type; CompressionCodecPtr compression_codec; + /// If we can use special codec then just get it if (ISerialization::isSpecialCompressionAllowed(substream_path)) - compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, &substream_type, default_codec); + compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, subtype.get(), default_codec); else /// otherwise return only generic codecs and don't use info about the` data_type compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); @@ -113,7 +116,8 @@ void MergeTreeDataPartWriterWide::addStreams( settings.max_compress_block_size); }; - column.type->enumerateStreams(serializations[column.name], callback); + ISerialization::SubstreamPath path; + serializations[column.name]->enumerateStreams(path, callback, column.type, nullptr); } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a33296cbf24..55de92735c3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -162,7 +162,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( LOG_DEBUG( log, - "Choose {} projection {}", + "Choose {} {} projection {}", + query_info.projection->complete ? "complete" : "incomplete", query_info.projection->desc->type, query_info.projection->desc->name); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 752f85a1290..2cf24215d28 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -12,9 +12,7 @@ #include #include #include -#include -#include -#include +#include #include diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index d08cec24184..df3496c8876 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -25,7 +25,6 @@ MergeTreeReadPool::MergeTreeReadPool( const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, - const bool check_columns_, const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, @@ -41,7 +40,7 @@ MergeTreeReadPool::MergeTreeReadPool( , parts_ranges{std::move(parts_)} { /// parts don't contain duplicate MergeTreeDataPart's. - const auto per_part_sum_marks = fillPerPartInfo(parts_ranges, check_columns_); + const auto per_part_sum_marks = fillPerPartInfo(parts_ranges); fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges, min_marks_for_concurrent_read_); } @@ -142,30 +141,6 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read, prewhere_info && prewhere_info->remove_prewhere_column, per_part_should_reorder[part_idx], std::move(curr_task_size_predictor)); } -MarkRanges MergeTreeReadPool::getRestMarks(const IMergeTreeDataPart & part, const MarkRange & from) const -{ - MarkRanges all_part_ranges; - - /// Inefficient in presence of large number of data parts. - for (const auto & part_ranges : parts_ranges) - { - if (part_ranges.data_part.get() == &part) - { - all_part_ranges = part_ranges.ranges; - break; - } - } - if (all_part_ranges.empty()) - throw Exception("Trying to read marks range [" + std::to_string(from.begin) + ", " + std::to_string(from.end) + "] from part '" - + part.getFullPath() + "' which has no ranges in this query", ErrorCodes::LOGICAL_ERROR); - - auto begin = std::lower_bound(all_part_ranges.begin(), all_part_ranges.end(), from, [] (const auto & f, const auto & s) { return f.begin < s.begin; }); - if (begin == all_part_ranges.end()) - begin = std::prev(all_part_ranges.end()); - begin->begin = from.begin; - return MarkRanges(begin, all_part_ranges.end()); -} - Block MergeTreeReadPool::getHeader() const { return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals(), data.getStorageID()); @@ -211,8 +186,7 @@ void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInf } -std::vector MergeTreeReadPool::fillPerPartInfo( - const RangesInDataParts & parts, const bool check_columns) +std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & parts) { std::vector per_part_sum_marks; Block sample_block = metadata_snapshot->getSampleBlock(); @@ -228,7 +202,7 @@ std::vector MergeTreeReadPool::fillPerPartInfo( per_part_sum_marks.push_back(sum_marks); - auto task_columns = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns); + auto task_columns = getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info); auto size_predictor = !predict_block_size_bytes ? nullptr : MergeTreeBaseSelectProcessor::getSizePredictor(part.data_part, task_columns, sample_block); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index 9949bdf86f8..3c7ee37b37e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -73,7 +73,7 @@ public: const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, - const bool check_columns_, const Names & column_names_, + const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, const bool do_not_steal_tasks_ = false); @@ -85,14 +85,10 @@ public: */ void profileFeedback(const ReadBufferFromFileBase::ProfileInfo info); - /// This method tells which mark ranges we have to read if we start from @from mark range - MarkRanges getRestMarks(const IMergeTreeDataPart & part, const MarkRange & from) const; - Block getHeader() const; private: - std::vector fillPerPartInfo( - const RangesInDataParts & parts, const bool check_columns); + std::vector fillPerPartInfo(const RangesInDataParts & parts); void fillPerThreadInfo( const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index c898874f737..15c5795ee7b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -160,9 +160,10 @@ size_t MergeTreeReaderCompact::readRows(size_t from_mark, bool continue_reading, readData(column_from_part, column, from_mark, *column_positions[pos], rows_to_read, read_only_offsets[pos]); size_t read_rows_in_column = column->size() - column_size_before_reading; - if (read_rows_in_column < rows_to_read) - throw Exception("Cannot read all data in MergeTreeReaderCompact. Rows read: " + toString(read_rows_in_column) + - ". Rows expected: " + toString(rows_to_read) + ".", ErrorCodes::CANNOT_READ_ALL_DATA); + if (read_rows_in_column != rows_to_read) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all data in MergeTreeReaderCompact. Rows read: {}. Rows expected: {}.", + read_rows_in_column, rows_to_read); } catch (Exception & e) { @@ -220,7 +221,7 @@ void MergeTreeReaderCompact::readData( serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, state); serialization->deserializeBinaryBulkWithMultipleStreams(temp_column, rows_to_read, deserialize_settings, state, nullptr); - auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), *temp_column); + auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), temp_column); /// TODO: Avoid extra copying. if (column->empty()) diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index fc57b48e86d..f225ecae8fa 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -10,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_READ_ALL_DATA; } @@ -76,6 +77,10 @@ MergeTreeReaderStream::MergeTreeReaderStream( if (max_mark_range_bytes != 0) read_settings = read_settings.adjustBufferSize(max_mark_range_bytes); + //// Empty buffer does not makes progress. + if (!read_settings.local_fs_buffer_size || !read_settings.remote_fs_buffer_size) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read to empty buffer."); + /// Initialize the objects that shall be used to perform read operations. if (uncompressed_cache) { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 206469da7be..29cc45a5c60 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -69,10 +69,6 @@ size_t MergeTreeReaderWide::readRows(size_t from_mark, bool continue_reading, si size_t num_columns = columns.size(); checkNumberOfColumns(num_columns); - /// Pointers to offset columns that are common to the nested data structure columns. - /// If append is true, then the value will be equal to nullptr and will be used only to - /// check that the offsets column has been already read. - OffsetColumns offset_columns; std::unordered_map caches; std::unordered_set prefetched_streams; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 98077605f89..203ce7a57d2 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -19,7 +19,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, - bool check_columns_, const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_, size_t part_index_in_query_, @@ -35,7 +34,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( all_mark_ranges(std::move(mark_ranges_)), part_index_in_query(part_index_in_query_), has_limit_below_one_block(has_limit_below_one_block_), - check_columns(check_columns_), total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges)) { addTotalRowsApprox(total_rows); @@ -46,7 +44,7 @@ void MergeTreeSelectProcessor::initializeReaders() { task_columns = getReadTaskColumns( storage, metadata_snapshot, data_part, - required_columns, prewhere_info, check_columns); + required_columns, prewhere_info); /// Will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & column_names = task_columns.columns.getNames(); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index ea4cd349cba..f9b19f9f692 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -28,7 +28,6 @@ public: bool use_uncompressed_cache, const PrewhereInfoPtr & prewhere_info, ExpressionActionsSettings actions_settings, - bool check_columns_, const MergeTreeReaderSettings & reader_settings, const Names & virt_column_names = {}, size_t part_index_in_query_ = 0, @@ -66,7 +65,6 @@ protected: /// It reduces amount of read data for queries with small LIMIT. bool has_limit_below_one_block = false; - bool check_columns; size_t total_rows = 0; }; diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 4eb6bc4b2e2..6a8ef860c87 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -68,18 +68,16 @@ bool MergeTreeThreadSelectProcessor::getNewTask() if (!reader) { - auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); - if (use_uncompressed_cache) owned_uncompressed_cache = storage.getContext()->getUncompressedCache(); owned_mark_cache = storage.getContext()->getMarkCache(); - reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges, + reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges, + pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); } @@ -88,14 +86,13 @@ bool MergeTreeThreadSelectProcessor::getNewTask() /// in other case we can reuse readers, anyway they will be "seeked" to required mark if (part_name != last_readed_part_name) { - auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); /// retain avg_value_size_hints - reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges, + reader = task->data_part->getReader(task->columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges, + pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, task->mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); } diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h index 7624dc303e0..4fec5ce46bc 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.h +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.h @@ -1,7 +1,7 @@ #pragma once -#include -#include +#include +#include #include #include #include diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 3f1a478dbc6..713f6a68612 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -81,9 +81,13 @@ std::pair MutateFromLogEntry stopwatch_ptr = std::make_unique(); + fake_query_context = Context::createCopy(storage.getContext()); + fake_query_context->makeQueryContext(); + fake_query_context->setCurrentQueryId(""); + mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_mutated_part, metadata_snapshot, commands, merge_mutate_entry.get(), - entry.create_time, storage.getContext(), reserved_space, table_lock_holder); + entry.create_time, fake_query_context, reserved_space, table_lock_holder); /// Adjust priority for (auto & item : future_mutated_part->parts) diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index 75368be30da..5709e7b808a 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -46,6 +46,7 @@ private: MergeTreeData::MutableDataPartPtr new_part{nullptr}; FutureMergedMutatedPartPtr future_mutated_part{nullptr}; + ContextMutablePtr fake_query_context; MutateTaskPtr mutate_task; }; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 3359693fa22..6b602484ff3 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -51,9 +51,13 @@ void MutatePlainMergeTreeTask::prepare() merge_list_entry.get()); }; + fake_query_context = Context::createCopy(storage.getContext()); + fake_query_context->makeQueryContext(); + fake_query_context->setCurrentQueryId(""); + mutate_task = storage.merger_mutator.mutatePartToTemporaryPart( future_part, metadata_snapshot, merge_mutate_entry->commands, merge_list_entry.get(), - time(nullptr), storage.getContext(), merge_mutate_entry->tagger->reserved_space, table_lock_holder); + time(nullptr), fake_query_context, merge_mutate_entry->tagger->reserved_space, table_lock_holder); } bool MutatePlainMergeTreeTask::executeStep() diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index 2a6c1545002..fb3c8318418 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -75,6 +75,7 @@ private: IExecutableTask::TaskResultCallback task_result_callback; + ContextMutablePtr fake_query_context; MutateTaskPtr mutate_task; }; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 40037c38779..36ce3f25744 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2,12 +2,12 @@ #include #include -#include -#include +#include +#include +#include +#include #include #include -#include -#include #include #include #include @@ -337,15 +337,14 @@ static NameToNameVector collectFilesForRenames( { /// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes. std::map stream_counts; - for (const NameAndTypePair & column : source_part->getColumns()) + for (const auto & column : source_part->getColumns()) { auto serialization = source_part->getSerializationForColumn(column); serialization->enumerateStreams( [&](const ISerialization::SubstreamPath & substream_path) { ++stream_counts[ISerialization::getFileNameForStream(column, substream_path)]; - }, - {}); + }); } NameToNameVector rename_vector; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 06856c73888..5731092f2a8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -62,7 +62,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() /// Both use relative_data_path which changes during rename, so we /// do it under share lock storage.clearOldWriteAheadLogs(); - storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); + storage.clearOldTemporaryDirectories(storage.merger_mutator, storage.getSettings()->temporary_directories_lifetime.totalSeconds()); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 75308f872dc..e3ca902b1bd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -2,9 +2,9 @@ #include #include #include -#include #include #include +#include #include diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 23ea280e88c..729b545e9a0 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 0af395fd1bd..d312a7f9c3e 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -141,10 +141,8 @@ IMergeTreeDataPart::Checksums checkDataPart( [&](const ISerialization::SubstreamPath & substream_path) { String projection_file_name = ISerialization::getFileNameForStream(projection_column, substream_path) + ".bin"; - checksums_data.files[projection_file_name] - = checksum_compressed_file(disk, projection_path + projection_file_name); - }, - {}); + checksums_data.files[projection_file_name] = checksum_compressed_file(disk, projection_path + projection_file_name); + }); } } @@ -221,7 +219,7 @@ IMergeTreeDataPart::Checksums checkDataPart( { String file_name = ISerialization::getFileNameForStream(column, substream_path) + ".bin"; checksums_data.files[file_name] = checksum_compressed_file(disk, path + file_name); - }, {}); + }); } } else diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 379573a381f..982acfe62a4 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -651,10 +651,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// single default partition with name "all". metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_key, metadata.columns, args.getContext()); - auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); - metadata.minmax_count_projection.emplace( - ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, args.getContext())); - /// PRIMARY KEY without ORDER BY is allowed and considered as ORDER BY. if (!args.storage_def->order_by && args.storage_def->primary_key) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); @@ -686,6 +682,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.primary_key.definition_ast = nullptr; } + auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); + auto primary_key_asts = metadata.primary_key.expression_list_ast->children; + metadata.minmax_count_projection.emplace( + ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, primary_key_asts, args.getContext())); + if (args.storage_def->sample_by) metadata.sampling_key = KeyDescription::getKeyFromAST(args.storage_def->sample_by->ptr(), metadata.columns, args.getContext()); @@ -736,10 +737,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, metadata.columns, args.getContext()); - auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); - metadata.minmax_count_projection.emplace( - ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, args.getContext())); - ++arg_num; /// If there is an expression for sampling @@ -765,6 +762,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) ++arg_num; + auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); + auto primary_key_asts = metadata.primary_key.expression_list_ast->children; + metadata.minmax_count_projection.emplace( + ProjectionDescription::getMinMaxCountProjection(args.columns, minmax_columns, primary_key_asts, args.getContext())); + const auto * ast = engine_args[arg_num]->as(); if (ast && ast->value.getType() == Field::Types::UInt64) storage_settings->index_granularity = safeGet(ast->value); diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 917ea156ab7..fa36588513b 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index fdc30919ee7..947c0bbe932 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -3,8 +3,6 @@ #include "StorageMaterializedPostgreSQL.h" #include #include -#include -#include #include #include #include diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 5a99be7f38c..ff47866d587 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -7,7 +7,6 @@ #include #include #include -#include #include diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 873a4b4860c..3796bd8ba57 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 0cd758cf49d..f771b2239ef 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -14,8 +14,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 42294b8152c..e5117a306ee 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include @@ -60,6 +60,7 @@ ProjectionDescription ProjectionDescription::clone() const other.metadata = metadata; other.key_size = key_size; other.is_minmax_count_projection = is_minmax_count_projection; + other.primary_key_max_column_name = primary_key_max_column_name; return other; } @@ -172,9 +173,15 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const return result; } -ProjectionDescription -ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & columns, const Names & minmax_columns, ContextPtr query_context) +ProjectionDescription ProjectionDescription::getMinMaxCountProjection( + const ColumnsDescription & columns, + const Names & minmax_columns, + const ASTs & primary_key_asts, + ContextPtr query_context) { + ProjectionDescription result; + result.is_minmax_count_projection = true; + auto select_query = std::make_shared(); ASTPtr select_expression_list = std::make_shared(); for (const auto & column : minmax_columns) @@ -182,10 +189,14 @@ ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & colum select_expression_list->children.push_back(makeASTFunction("min", std::make_shared(column))); select_expression_list->children.push_back(makeASTFunction("max", std::make_shared(column))); } + if (!primary_key_asts.empty()) + { + select_expression_list->children.push_back(makeASTFunction("min", primary_key_asts.front()->clone())); + select_expression_list->children.push_back(makeASTFunction("max", primary_key_asts.front()->clone())); + } select_expression_list->children.push_back(makeASTFunction("count")); select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); - ProjectionDescription result; result.definition_ast = select_query; result.name = MINMAX_COUNT_PROJECTION_NAME; result.query_ast = select_query->cloneToASTSelect(); @@ -196,6 +207,14 @@ ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & colum result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); + /// If we have primary key and it's not in minmax_columns, it will be used as one additional minmax columns. + if (!primary_key_asts.empty() && result.sample_block.columns() == 2 * (minmax_columns.size() + 1) + 1) + { + /// min(p1), max(p1), min(p2), max(p2), ..., min(k1), max(k1), count() + /// ^ + /// size - 2 + result.primary_key_max_column_name = *(result.sample_block.getNames().cend() - 2); + } result.type = ProjectionDescription::Type::Aggregate; StorageInMemoryMetadata metadata; metadata.setColumns(ColumnsDescription(result.sample_block.getNamesAndTypesList())); @@ -203,7 +222,6 @@ ProjectionDescription::getMinMaxCountProjection(const ColumnsDescription & colum metadata.sorting_key = KeyDescription::buildEmptyKey(); metadata.primary_key = KeyDescription::buildEmptyKey(); result.metadata = std::make_shared(metadata); - result.is_minmax_count_projection = true; return result; } diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index b9c11cb0771..7c254182ba4 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -58,12 +58,15 @@ struct ProjectionDescription bool is_minmax_count_projection = false; + /// If a primary key expression is used in the minmax_count projection, store the name of max expression. + String primary_key_max_column_name; + /// Parse projection from definition AST static ProjectionDescription getProjectionFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr query_context); - static ProjectionDescription - getMinMaxCountProjection(const ColumnsDescription & columns, const Names & minmax_columns, ContextPtr query_context); + static ProjectionDescription getMinMaxCountProjection( + const ColumnsDescription & columns, const Names & minmax_columns, const ASTs & primary_key_asts, ContextPtr query_context); ProjectionDescription() = default; diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 34edd06d3e2..b954ad3ab23 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 0944a8f12d5..cf9b557de25 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -13,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.h b/src/Storages/ReadFinalForExternalReplicaStorage.h index f09a115919d..1be0aa0f4a6 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.h +++ b/src/Storages/ReadFinalForExternalReplicaStorage.h @@ -7,7 +7,7 @@ #if USE_MYSQL || USE_LIBPQXX #include -#include +#include namespace DB diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 1ab168f772f..18bf0e2c19b 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index b08818a2baa..fe7b22d331b 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -133,6 +133,7 @@ struct SelectQueryInfo { ASTPtr query; ASTPtr view_query; /// Optimized VIEW query + ASTPtr original_query; /// Unmodified query for projection analysis /// Cluster for the query. ClusterPtr cluster; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index c3ce70955bf..a74223054e9 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 59f250d67b8..3e8955ad864 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index c8bc215dd6c..51a73eb511e 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -8,8 +8,7 @@ #include #include #include -#include -#include +#include #include #include @@ -213,11 +212,20 @@ void StorageDictionary::renameInMemory(const StorageID & new_table_id) auto old_table_id = getStorageID(); IStorage::renameInMemory(new_table_id); - if (configuration) + bool has_configuration = false; { - configuration->setString("dictionary.database", new_table_id.database_name); - configuration->setString("dictionary.name", new_table_id.table_name); + std::lock_guard lock(dictionary_config_mutex); + if (configuration) + { + has_configuration = true; + configuration->setString("dictionary.database", new_table_id.database_name); + configuration->setString("dictionary.name", new_table_id.table_name); + } + } + + if (has_configuration) + { const auto & external_dictionaries_loader = getContext()->getExternalDictionariesLoader(); auto result = external_dictionaries_loader.getLoadResult(old_table_id.getInternalDictionaryName()); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b9c15e19c33..1dd75f09b92 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -4,7 +4,7 @@ #include -#include +#include #include #include @@ -59,7 +59,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageExecutable.cpp b/src/Storages/StorageExecutable.cpp index d6e242d1a97..16647d0b60f 100644 --- a/src/Storages/StorageExecutable.cpp +++ b/src/Storages/StorageExecutable.cpp @@ -3,7 +3,8 @@ #include #include -#include +#include + #include #include @@ -11,8 +12,7 @@ #include #include -#include -#include +#include #include #include #include @@ -113,9 +113,16 @@ Pipe StorageExecutable::read( { auto user_scripts_path = context->getUserScriptsPath(); auto script_path = user_scripts_path + '/' + script_name; - if (!std::filesystem::exists(std::filesystem::path(script_path))) + + if (!pathStartsWith(script_path, user_scripts_path)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Executable file {} does not exists inside {}", + "Executable file {} must be inside user scripts folder {}", + script_name, + user_scripts_path); + + if (!std::filesystem::exists(std::filesystem::path(script_path))) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Executable file {} does not exist inside user scripts folder {}", script_name, user_scripts_path); @@ -141,9 +148,9 @@ Pipe StorageExecutable::read( bool result = process_pool->tryBorrowObject(process, [&config, this]() { config.terminate_in_destructor_strategy = ShellCommand::DestructorStrategy{ true /*terminate_in_destructor*/, settings.command_termination_timeout }; - auto shell_command = ShellCommand::execute(config); + auto shell_command = ShellCommand::executeDirect(config); return shell_command; - }, settings.max_command_execution_time * 1000); + }, settings.max_command_execution_time * 10000); if (!result) throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, diff --git a/src/Storages/StorageExecutable.h b/src/Storages/StorageExecutable.h index 0d17e003ce4..74df17f1463 100644 --- a/src/Storages/StorageExecutable.h +++ b/src/Storages/StorageExecutable.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index 671bd808d2c..b4132edfec9 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index ec0bd5e5840..978d161852b 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -16,13 +16,13 @@ #include #include -#include #include #include #include #include #include +#include #include #include @@ -34,10 +34,10 @@ #include #include #include -#include #include +#include #include -#include +#include #include @@ -125,8 +125,8 @@ void checkCreationIsAllowed(ContextPtr context_global, const std::string & db_di return; /// "/dev/null" is allowed for perf testing - if (!startsWith(table_path, db_dir_path) && table_path != "/dev/null") - throw Exception("File is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); + if (!fileOrSymlinkPathStartsWith(table_path, db_dir_path) && table_path != "/dev/null") + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "File `{}` is not inside `{}`", table_path, db_dir_path); if (fs::exists(table_path) && fs::is_directory(table_path)) throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); @@ -141,7 +141,10 @@ Strings StorageFile::getPathsList(const String & table_path, const String & user fs_table_path = user_files_absolute_path / fs_table_path; Strings paths; - const String path = fs::weakly_canonical(fs_table_path); + /// Do not use fs::canonical or fs::weakly_canonical. + /// Otherwise it will not allow to work with symlinks in `user_files_path` directory. + String path = fs::absolute(fs_table_path); + path = fs::path(path).lexically_normal(); /// Normalize path. if (path.find_first_of("*?{") == std::string::npos) { std::error_code error; @@ -479,8 +482,6 @@ Pipe StorageFile::read( size_t max_block_size, unsigned num_streams) { - BlockInputStreams blocks_input; - if (use_table_fd) /// need to call ctr BlockInputStream paths = {""}; /// when use fd, paths are empty else diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index cfec328bbfc..a4dfbfc3f96 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index d707d7a6cdf..2ed7a77b59d 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -5,8 +5,7 @@ #include #include -#include -#include +#include namespace DB diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index bfb634c6bba..b28bc143bb0 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index c2c1cff5f53..8ac341661bf 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include /// toLower diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index c400cb01e29..54a8e4cd1ce 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -16,8 +16,6 @@ #include -#include - #include #include @@ -25,7 +23,7 @@ #include "StorageLogSettings.h" #include #include -#include +#include #include #include @@ -215,8 +213,7 @@ public: , storage(storage_) , metadata_snapshot(metadata_snapshot_) , lock(std::move(lock_)) - , marks_stream( - storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Rewrite)) + , marks_stream(storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Append)) { if (!lock) throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); @@ -408,7 +405,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c storage.files[stream_name].data_file_path, columns.getCodecOrDefault(name_and_type.name), storage.max_compress_block_size); - }, settings.path); + }); settings.getter = createStreamGetter(name_and_type, written_streams); @@ -429,7 +426,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c mark.offset = stream_it->second.plain_offset + stream_it->second.plain->count(); out_marks.emplace_back(file.column_index, mark); - }, settings.path); + }); serialization->serializeBinaryBulkWithMultipleStreams(column, 0, 0, settings, serialize_states[name]); @@ -443,7 +440,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c if (streams.end() == it) throw Exception("Logical error: stream was not created when writing data in LogBlockOutputStream", ErrorCodes::LOGICAL_ERROR); it->second.compressed.next(); - }, settings.path); + }); } @@ -629,13 +626,12 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMeta * If this is a data type with multiple stream, get the first stream, that we assume have real row count. * (Example: for Array data type, first stream is array sizes; and number of array sizes is the number of arrays). */ - ISerialization::SubstreamPath substream_root_path; auto serialization = column.type->getDefaultSerialization(); serialization->enumerateStreams([&](const ISerialization::SubstreamPath & substream_path) { if (filename.empty()) filename = ISerialization::getFileNameForStream(column, substream_path); - }, substream_root_path); + }); Files::const_iterator it = files.find(filename); if (files.end() == it) @@ -752,9 +748,8 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; }; - ISerialization::SubstreamPath substream_path; auto serialization = column.type->getDefaultSerialization(); - serialization->enumerateStreams(stream_callback, substream_path); + serialization->enumerateStreams(stream_callback); } return column_sizes; diff --git a/src/Storages/StorageMaterializedMySQL.cpp b/src/Storages/StorageMaterializedMySQL.cpp index 52f53b9ceee..37a9838593b 100644 --- a/src/Storages/StorageMaterializedMySQL.cpp +++ b/src/Storages/StorageMaterializedMySQL.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 5436d306122..29dbd6d38ed 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -20,7 +19,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 299e39a3836..37cb238ba0f 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 846fd4af5fd..063802faf1a 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -8,7 +8,6 @@ #include #include -#include #include diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index ea42b48cace..38db0b61e8d 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include #include @@ -383,7 +383,7 @@ Pipe StorageMerge::createSources( { pipe = QueryPipelineBuilder::getPipe(InterpreterSelectQuery( modified_query_info.query, modified_context, - std::make_shared(header), + Pipe(std::make_shared(header)), SelectQueryOptions(processed_stage).analyze()).buildQueryPipeline()); pipe.addInterpreterContext(modified_context); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index ab42da1dfa0..699c78cdab9 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include #include @@ -105,7 +105,7 @@ void StorageMergeTree::startup() /// Temporary directories contain incomplete results of merges (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately - clearOldTemporaryDirectories(0); + clearOldTemporaryDirectories(merger_mutator, 0); /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup_parts.restart(); @@ -924,12 +924,16 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( { try { + auto fake_query_context = Context::createCopy(getContext()); + fake_query_context->makeQueryContext(); + fake_query_context->setCurrentQueryId(""); MutationsInterpreter interpreter( - shared_from_this(), metadata_snapshot, commands_for_size_validation, getContext(), false); + shared_from_this(), metadata_snapshot, commands_for_size_validation, fake_query_context, false); commands_size += interpreter.evaluateCommandsSize(); } catch (...) { + tryLogCurrentException(log); MergeTreeMutationEntry & entry = it->second; entry.latest_fail_time = time(nullptr); entry.latest_fail_reason = getCurrentExceptionMessage(false); @@ -962,54 +966,6 @@ std::shared_ptr StorageMergeTree::selectPartsToMutate( return {}; } -bool StorageMergeTree::mutateSelectedPart(const StorageMetadataPtr & metadata_snapshot, MergeMutateSelectedEntry & merge_mutate_entry, TableLockHolder & table_lock_holder) -{ - auto & future_part = merge_mutate_entry.future_part; - - const Settings & settings = getContext()->getSettingsRef(); - auto merge_list_entry = getContext()->getMergeList().insert( - getStorageID(), future_part, - settings.memory_profiler_step, - settings.memory_profiler_sample_probability, - settings.max_untracked_memory); - Stopwatch stopwatch; - MutableDataPartPtr new_part; - - auto write_part_log = [&] (const ExecutionStatus & execution_status) - { - writePartLog( - PartLogElement::MUTATE_PART, - execution_status, - stopwatch.elapsed(), - future_part->name, - new_part, - future_part->parts, - merge_list_entry.get()); - }; - - try - { - auto task = merger_mutator.mutatePartToTemporaryPart( - future_part, metadata_snapshot, merge_mutate_entry.commands, merge_list_entry.get(), - time(nullptr), getContext(), merge_mutate_entry.tagger->reserved_space, table_lock_holder); - - new_part = executeHere(task); - - renameTempPartAndReplace(new_part); - - updateMutationEntriesErrors(future_part, true, ""); - write_part_log({}); - } - catch (...) - { - updateMutationEntriesErrors(future_part, false, getCurrentExceptionMessage(false)); - write_part_log(ExecutionStatus::fromCurrentException()); - throw; - } - - return true; -} - bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assignee) //-V657 { if (shutdown_called) @@ -1063,7 +1019,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign assignee.scheduleCommonTask(ExecutableLambdaAdapter::create( [this, share_lock] () { - clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds()); + clearOldTemporaryDirectories(merger_mutator, getSettings()->temporary_directories_lifetime.totalSeconds()); return true; }, common_assignee_trigger, getStorageID())); scheduled = true; diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index dd5ac26c3eb..db651bd00c3 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -178,7 +178,6 @@ private: std::shared_ptr selectPartsToMutate(const StorageMetadataPtr & metadata_snapshot, String * disable_reason, TableLockHolder & table_lock_holder); - bool mutateSelectedPart(const StorageMetadataPtr & metadata_snapshot, MergeMutateSelectedEntry & entry, TableLockHolder & table_lock_holder); Int64 getCurrentMutationVersion( const DataPartPtr & part, diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 47856645398..9721a5d1fc4 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -9,13 +9,11 @@ #include #include #include -#include #include #include #include -#include -#include #include +#include #include namespace DB diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index edd75078e49..b8d422d20db 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -4,12 +4,11 @@ #include #include -#include +#include #include #include #include #include -#include #include #include #include @@ -19,9 +18,8 @@ #include #include #include -#include #include -#include +#include #include #include diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 96e72c77f00..de667c1d75c 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/StoragePostgreSQL.h b/src/Storages/StoragePostgreSQL.h index b4bb5400930..10a60bf9b21 100644 --- a/src/Storages/StoragePostgreSQL.h +++ b/src/Storages/StoragePostgreSQL.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index c81ef6febdc..62e0ccc76aa 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include namespace DB diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 416d37cd351..ef8052de0ec 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -60,7 +60,6 @@ #include #include -#include #include @@ -479,7 +478,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. - clearOldTemporaryDirectories(0); + clearOldTemporaryDirectories(merger_mutator, 0); clearOldWriteAheadLogs(); } @@ -1016,8 +1015,15 @@ void StorageReplicatedMergeTree::setTableStructure( if (metadata_diff.sampling_expression_changed) { - auto sample_by_ast = parse_key_expr(metadata_diff.new_sampling_expression); - new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, getContext()); + if (!metadata_diff.new_sampling_expression.empty()) + { + auto sample_by_ast = parse_key_expr(metadata_diff.new_sampling_expression); + new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, getContext()); + } + else /// SAMPLE BY was removed + { + new_metadata.sampling_key = {}; + } } if (metadata_diff.skip_indices_changed) @@ -5996,12 +6002,12 @@ void StorageReplicatedMergeTree::replacePartitionFrom( MutableDataPartsVector dst_parts; Strings block_id_paths; Strings part_checksums; + auto zookeeper = getZooKeeper(); std::vector ephemeral_locks; LOG_DEBUG(log, "Cloning {} parts", src_all_parts.size()); static const String TMP_PREFIX = "tmp_replace_from_"; - auto zookeeper = getZooKeeper(); String alter_partition_version_path = zookeeper_path + "/alter_partition_version"; Coordination::Stat alter_partition_version_stat; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 65e07fa6144..bc03dbb45ae 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 7fb7b0e00d7..ec77bb6e1a1 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -31,12 +31,12 @@ #include -#include #include #include -#include +#include +#include -#include +#include #include #include @@ -53,8 +53,7 @@ #include #include -#include -#include +#include #include #include diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 33364557288..9d05235552c 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -22,15 +22,12 @@ #include #include #include -#include #include -#include -#include -#include -#include +#include +#include #include "Processors/Sources/SourceWithProgress.h" #include -#include +#include #include #include #include diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 32660cb1b1f..4e2c6cfbe10 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -3,7 +3,7 @@ #if USE_SQLITE #include #include -#include +#include #include #include #include @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 0042c95910e..e234eaf45f7 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -4,8 +4,9 @@ #include #include #include -#include -#include +#include +#include +#include #include #include #include @@ -218,7 +219,7 @@ void StorageSetOrJoinBase::restoreFromFile(const String & file_path) CompressedReadBuffer compressed_backup_buf(*backup_buf); NativeReader backup_stream(compressed_backup_buf, 0); - BlockStreamProfileInfo info; + ProfileInfo info; while (Block block = backup_stream.read()) { info.update(block); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 07c66c3034c..50113c391cc 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -14,9 +14,8 @@ #include #include -#include -#include -#include +#include +#include #include @@ -32,7 +31,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 557f378ab77..0b7ab30fa24 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index d1778342ec5..8657467226f 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -36,7 +36,7 @@ #include #include -#include +#include #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" @@ -448,9 +448,8 @@ void StorageTinyLog::addFiles(const NameAndTypePair & column) } }; - ISerialization::SubstreamPath substream_path; auto serialization = type->getDefaultSerialization(); - serialization->enumerateStreams(stream_callback, substream_path); + serialization->enumerateStreams(stream_callback); } @@ -544,9 +543,8 @@ IStorage::ColumnSizeByName StorageTinyLog::getColumnSizes() const size.data_compressed += file_sizes[fileName(it->second.data_file_path)]; }; - ISerialization::SubstreamPath substream_path; auto serialization = column.type->getDefaultSerialization(); - serialization->enumerateStreams(stream_callback, substream_path); + serialization->enumerateStreams(stream_callback); } return column_sizes; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 75ad2761362..174ee58ee42 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -13,14 +13,13 @@ #include #include -#include +#include -#include #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index ace5ca3667c..650782afbba 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index df774554365..9ffea587b97 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -14,7 +14,7 @@ #include -#include +#include #include #include #include diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index a1254e2aaeb..a90e21a2edb 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -1,6 +1,5 @@ #include "StorageXDBC.h" -#include #include #include #include @@ -8,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index c074659af2b..6897b4a3a79 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Storages/System/InformationSchema/columns.sql b/src/Storages/System/InformationSchema/columns.sql index 1b71ea58f57..80cf2f911be 100644 --- a/src/Storages/System/InformationSchema/columns.sql +++ b/src/Storages/System/InformationSchema/columns.sql @@ -23,6 +23,8 @@ ATTACH VIEW columns `domain_catalog` Nullable(String), `domain_schema` Nullable(String), `domain_name` Nullable(String), + `column_comment` String, + `column_type` String, `TABLE_CATALOG` String ALIAS table_catalog, `TABLE_SCHEMA` String ALIAS table_schema, `TABLE_NAME` String ALIAS table_name, @@ -45,7 +47,9 @@ ATTACH VIEW columns `COLLATION_NAME` Nullable(String) ALIAS collation_name, `DOMAIN_CATALOG` Nullable(String) ALIAS domain_catalog, `DOMAIN_SCHEMA` Nullable(String) ALIAS domain_schema, - `DOMAIN_NAME` Nullable(String) ALIAS domain_name + `DOMAIN_NAME` Nullable(String) ALIAS domain_name, + `COLUMN_COMMENT` String ALIAS column_comment, + `COLUMN_TYPE` String ALIAS column_type ) AS SELECT database AS table_catalog, @@ -70,5 +74,7 @@ SELECT NULL AS collation_name, NULL AS domain_catalog, NULL AS domain_schema, - NULL AS domain_name + NULL AS domain_name, + comment AS column_comment, + type AS column_type FROM system.columns diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 3a88cc96639..136c2489be2 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -4,7 +4,7 @@ #include #include -#include +#include #include namespace DB diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 7c28f897121..7558ae0ae92 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 254e6f77e0c..f32a609077f 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index d1456d72685..624fc54998c 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -1,7 +1,7 @@ #include #include -#include +#include #include #include diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 9aedee66b5f..f2b2102c7ff 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 639692beda5..532abb8e2f3 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index e7ecfc7c4f0..f86295cd06b 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include @@ -19,8 +18,8 @@ #include #include #include -#include -#include +#include +#include #if !defined(__clang__) # pragma GCC diagnostic push diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index afd81638da4..699ad698bd8 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -16,9 +16,6 @@ #include -#include - - namespace DB { diff --git a/src/TableFunctions/TableFunctionExecutable.h b/src/TableFunctions/TableFunctionExecutable.h index 05ef2b3b26b..128ee8e46fc 100644 --- a/src/TableFunctions/TableFunctionExecutable.h +++ b/src/TableFunctions/TableFunctionExecutable.h @@ -1,6 +1,5 @@ #pragma once -#include #include namespace DB diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 005a689f895..80f108eb68a 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 160fc3c2468..7e28decfdb0 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include @@ -21,7 +21,6 @@ #include #include #include -#include #include "registerTableFunctions.h" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 132909438da..c9858910837 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -7,15 +7,19 @@ else () include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake) endif () -install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -install ( - DIRECTORY queries performance config - DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test - USE_SOURCE_PERMISSIONS - COMPONENT clickhouse - PATTERN "CMakeLists.txt" EXCLUDE - PATTERN ".gitignore" EXCLUDE -) +option (ENABLE_CLICKHOUSE_TEST "Install clickhouse-test script and relevant tests scenarios" ON) + +if (ENABLE_CLICKHOUSE_TEST) + install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + install ( + DIRECTORY queries performance config + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test + USE_SOURCE_PERMISSIONS + COMPONENT clickhouse + PATTERN "CMakeLists.txt" EXCLUDE + PATTERN ".gitignore" EXCLUDE + ) +endif () if (ENABLE_TEST_INTEGRATION) add_subdirectory (integration) diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py new file mode 100644 index 00000000000..f095b04872b --- /dev/null +++ b/tests/ci/compress_files.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +import subprocess +import logging +import os + +def compress_file_fast(path, archive_path): + if os.path.exists('/usr/bin/pigz'): + subprocess.check_call("pigz < {} > {}".format(path, archive_path), shell=True) + else: + subprocess.check_call("gzip < {} > {}".format(path, archive_path), shell=True) + + +def compress_fast(path, archive_path, exclude=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster") + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, compressing with default tar") + + if exclude is None: + exclude_part = "" + elif isinstance(exclude, list): + exclude_part = " ".join(["--exclude {}".format(x) for x in exclude]) + else: + exclude_part = "--exclude {}".format(str(exclude)) + + fname = os.path.basename(path) + if os.path.isfile(path): + path = os.path.dirname(path) + else: + path += "/.." + cmd = "tar {} {} -cf {} -C {} {}".format(pigz_part, exclude_part, archive_path, path, fname) + logging.debug("compress_fast cmd:{}".format(cmd)) + subprocess.check_call(cmd, shell=True) + + +def decompress_fast(archive_path, result_path=None): + pigz_part = '' + if os.path.exists('/usr/bin/pigz'): + logging.info("pigz found, will compress and decompress faster ('{}' -> '{}')".format(archive_path, result_path)) + pigz_part = "--use-compress-program='pigz'" + else: + pigz_part = '-z' + logging.info("no pigz, decompressing with default tar ('{}' -> '{}')".format(archive_path, result_path)) + + if result_path is None: + subprocess.check_call("tar {} -xf {}".format(pigz_part, archive_path), shell=True) + else: + subprocess.check_call("tar {} -xf {} -C {}".format(pigz_part, archive_path, result_path), shell=True) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py new file mode 100644 index 00000000000..9bd3f431429 --- /dev/null +++ b/tests/ci/docker_images_check.py @@ -0,0 +1,231 @@ +#!/usr/bin/env python3 +import subprocess +import logging +from report import create_test_html_report +from s3_helper import S3Helper +import json +import os +from pr_info import PRInfo +from github import Github +import shutil +from get_robot_token import get_best_robot_token, get_parameter_from_ssm + +NAME = "Push to Dockerhub (actions)" + +def get_changed_docker_images(pr_info, repo_path, image_file_path): + images_dict = {} + path_to_images_file = os.path.join(repo_path, image_file_path) + if os.path.exists(path_to_images_file): + with open(path_to_images_file, 'r') as dict_file: + images_dict = json.load(dict_file) + else: + logging.info("Image file %s doesnt exists in repo %s", image_file_path, repo_path) + + dockerhub_repo_name = 'yandex' + if not images_dict: + return [], dockerhub_repo_name + + files_changed = pr_info.changed_files + + logging.info("Changed files for PR %s @ %s: %s", pr_info.number, pr_info.sha, str(files_changed)) + + changed_images = [] + + for dockerfile_dir, image_description in images_dict.items(): + if image_description['name'].startswith('clickhouse/'): + dockerhub_repo_name = 'clickhouse' + + for f in files_changed: + if f.startswith(dockerfile_dir): + logging.info( + "Found changed file '%s' which affects docker image '%s' with path '%s'", + f, image_description['name'], dockerfile_dir) + changed_images.append(dockerfile_dir) + break + + # The order is important: dependents should go later than bases, so that + # they are built with updated base versions. + index = 0 + while index < len(changed_images): + image = changed_images[index] + for dependent in images_dict[image]['dependent']: + logging.info( + "Marking docker image '%s' as changed because it depends on changed docker image '%s'", + dependent, image) + changed_images.append(dependent) + index += 1 + if index > 100: + # Sanity check to prevent infinite loop. + raise "Too many changed docker images, this is a bug." + str(changed_images) + + # If a dependent image was already in the list because its own files + # changed, but then it was added as a dependent of a changed base, we + # must remove the earlier entry so that it doesn't go earlier than its + # base. This way, the dependent will be rebuilt later than the base, and + # will correctly use the updated version of the base. + seen = set() + no_dups_reversed = [] + for x in reversed(changed_images): + if x not in seen: + seen.add(x) + no_dups_reversed.append(x) + + result = [(x, images_dict[x]['name']) for x in reversed(no_dups_reversed)] + logging.info("Changed docker images for PR %s @ %s: '%s'", pr_info.number, pr_info.sha, result) + return result, dockerhub_repo_name + +def build_and_push_one_image(path_to_dockerfile_folder, image_name, version_string): + logging.info("Building docker image %s with version %s from path %s", image_name, version_string, path_to_dockerfile_folder) + build_log = None + push_log = None + with open('build_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im}:{ver} {path}".format(im=image_name, ver=version_string, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + with open('tag_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker build --network=host -t {im} {path}".format(im=image_name, path=path_to_dockerfile_folder) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + build_log = str(pl.name) + if retcode != 0: + return False, build_log, None + + logging.info("Pushing image %s to dockerhub", image_name) + + with open('push_log_' + str(image_name).replace('/', '_') + "_" + version_string, 'w') as pl: + cmd = "docker push {im}:{ver}".format(im=image_name, ver=version_string) + retcode = subprocess.Popen(cmd, shell=True, stderr=pl, stdout=pl).wait() + push_log = str(pl.name) + if retcode != 0: + return False, build_log, push_log + + logging.info("Processing of %s successfully finished", image_name) + return True, build_log, push_log + +def process_single_image(versions, path_to_dockerfile_folder, image_name): + logging.info("Image will be pushed with versions %s", ', '.join(versions)) + result = [] + for ver in versions: + for i in range(5): + success, build_log, push_log = build_and_push_one_image(path_to_dockerfile_folder, image_name, ver) + if success: + result.append((image_name + ":" + ver, build_log, push_log, 'OK')) + break + logging.info("Got error will retry %s time and sleep for %s seconds", i, i * 5) + time.sleep(i * 5) + else: + result.append((image_name + ":" + ver, build_log, push_log, 'FAIL')) + + logging.info("Processing finished") + return result + + +def process_test_results(s3_client, test_results, s3_path_prefix): + overall_status = 'success' + processed_test_results = [] + for image, build_log, push_log, status in test_results: + if status != 'OK': + overall_status = 'failure' + url_part = '' + if build_log is not None and os.path.exists(build_log): + build_url = s3_client.upload_test_report_to_s3( + build_log, + s3_path_prefix + "/" + os.path.basename(build_log)) + url_part += 'build_log'.format(build_url) + if push_log is not None and os.path.exists(push_log): + push_url = s3_client.upload_test_report_to_s3( + push_log, + s3_path_prefix + "/" + os.path.basename(push_log)) + if url_part: + url_part += ', ' + url_part += 'push_log'.format(push_url) + if url_part: + test_name = image + ' (' + url_part + ')' + else: + test_name = image + processed_test_results.append((test_name, status)) + return overall_status, processed_test_results + +def upload_results(s3_client, pr_number, commit_sha, test_results): + s3_path_prefix = f"{pr_number}/{commit_sha}/" + NAME.lower().replace(' ', '_') + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + html_report = create_test_html_report(NAME, test_results, "https://hub.docker.com/u/clickhouse", task_url, branch_url, branch_name, commit_url) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'docker_images_check') + dockerhub_password = get_parameter_from_ssm('dockerhub_robot_password') + + if os.path.exists(temp_path): + shutil.rmtree(temp_path) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, False, True) + changed_images, dockerhub_repo_name = get_changed_docker_images(pr_info, repo_path, "docker/images.json") + logging.info("Has changed images %s", ', '.join([str(image[0]) for image in changed_images])) + pr_commit_version = str(pr_info.number) + '-' + pr_info.sha + + versions = [str(pr_info.number), pr_commit_version] + + subprocess.check_output("docker login --username 'robotclickhouse' --password '{}'".format(dockerhub_password), shell=True) + + result_images = {} + images_processing_result = [] + for rel_path, image_name in changed_images: + full_path = os.path.join(repo_path, rel_path) + images_processing_result += process_single_image(versions, full_path, image_name) + result_images[image_name] = pr_commit_version + + if len(changed_images): + description = "Updated " + ','.join([im[1] for im in changed_images]) + else: + description = "Nothing to update" + + + if len(description) >= 140: + description = description[:136] + "..." + + s3_helper = S3Helper('https://s3.amazonaws.com') + + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + status, test_results = process_test_results(s3_helper, images_processing_result, s3_path_prefix) + + url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results) + + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=url) + + with open(os.path.join(temp_path, 'changed_images.json'), 'w') as images_file: + json.dump(result_images, images_file) + + print("::notice ::Report url: {}".format(url)) + print("::set-output name=url_output::\"{}\"".format(url)) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py new file mode 100644 index 00000000000..db405cf8f73 --- /dev/null +++ b/tests/ci/finish_check.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +import logging +from github import Github +from pr_info import PRInfo +import json +import os +from get_robot_token import get_best_robot_token + +NAME = 'Run Check (actions)' + +def filter_statuses(statuses): + """ + Squash statuses to latest state + 1. context="first", state="success", update_time=1 + 2. context="second", state="success", update_time=2 + 3. context="first", stat="failure", update_time=3 + =========> + 1. context="second", state="success" + 2. context="first", stat="failure" + """ + filt = {} + for status in sorted(statuses, key=lambda x: x.updated_at): + filt[status.context] = status + return filt + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + statuses = filter_statuses(list(commit.get_statuses())) + if NAME in statuses and statuses[NAME].state == "pending": + commit.create_status(context=NAME, description="All checks finished", state="success", target_url=url) diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py new file mode 100644 index 00000000000..db37ee311c5 --- /dev/null +++ b/tests/ci/get_robot_token.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python3 +import boto3 +from github import Github + +def get_parameter_from_ssm(name, decrypt=True, client=None): + if not client: + client = boto3.client('ssm', region_name='us-east-1') + return client.get_parameter(Name=name, WithDecryption=decrypt)['Parameter']['Value'] + +def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): + client = boto3.client('ssm', region_name='us-east-1') + tokens = {} + for i in range(1, total_tokens + 1): + token_name = token_prefix_env_name + str(i) + token = get_parameter_from_ssm(token_name, True, client) + gh = Github(token) + rest, _ = gh.rate_limiting + tokens[token] = rest + + return max(tokens.items(), key=lambda x: x[1])[0] diff --git a/tests/ci/metrics_lambda/Dockerfile b/tests/ci/metrics_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/metrics_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/metrics_lambda/app.py b/tests/ci/metrics_lambda/app.py new file mode 100644 index 00000000000..d2fb048638b --- /dev/null +++ b/tests/ci/metrics_lambda/app.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + +def main(github_secret_key, github_app_id, push_to_cloudwatch): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runners = list_runners(access_token) + if push_to_cloudwatch: + push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + else: + print(runners) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-cloudwatch', action='store_true', help='Store received token in parameter store') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_cloudwatch) diff --git a/tests/ci/metrics_lambda/requirements.txt b/tests/ci/metrics_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/metrics_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py new file mode 100644 index 00000000000..8feedb2d4d7 --- /dev/null +++ b/tests/ci/pr_info.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import requests +import json +import os +import subprocess +import urllib +from unidiff import PatchSet + + +class PRInfo: + def __init__(self, github_event, need_orgs=False, need_changed_files=False): + self.number = github_event['number'] + if 'after' in github_event: + self.sha = github_event['after'] + else: + self.sha = github_event['pull_request']['head']['sha'] + + self.labels = set([l['name'] for l in github_event['pull_request']['labels']]) + self.user_login = github_event['pull_request']['user']['login'] + self.user_orgs = set([]) + if need_orgs: + user_orgs_response = requests.get(github_event['pull_request']['user']['organizations_url']) + if user_orgs_response.ok: + response_json = user_orgs_response.json() + self.user_orgs = set(org['id'] for org in response_json) + + self.changed_files = set([]) + if need_changed_files: + diff_url = github_event['pull_request']['diff_url'] + diff = urllib.request.urlopen(github_event['pull_request']['diff_url']) + diff_object = PatchSet(diff, diff.headers.get_charsets()[0]) + self.changed_files = set([f.path for f in diff_object]) + + def get_dict(self): + return { + 'sha': self.sha, + 'number': self.number, + 'labels': self.labels, + 'user_login': self.user_login, + 'user_orgs': self.user_orgs, + } diff --git a/tests/ci/pvs_check.py b/tests/ci/pvs_check.py new file mode 100644 index 00000000000..f8b1b58f307 --- /dev/null +++ b/tests/ci/pvs_check.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +import subprocess +import os +import json +import logging +from github import Github +from report import create_test_html_report +from s3_helper import S3Helper +from pr_info import PRInfo +import shutil +import sys +from get_robot_token import get_best_robot_token + +NAME = 'PVS Studio (actions)' +LICENCE_NAME = 'Free license: ClickHouse, Yandex' +HTML_REPORT_FOLDER = 'pvs-studio-html-report' +TXT_REPORT_NAME = 'pvs-studio-task-report.txt' + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + +def _process_txt_report(path): + warnings = [] + errors = [] + with open(path, 'r') as report_file: + for line in report_file: + if 'viva64' in line: + continue + elif 'warn' in line: + warnings.append(':'.join(line.split('\t')[0:2])) + elif 'err' in line: + errors.append(':'.join(line.split('\t')[0:2])) + return warnings, errors + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): + s3_path_prefix = str(pr_number) + "/" + commit_sha + "/" + NAME.lower().replace(' ', '_') + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.path.join(os.getenv("REPO_COPY", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'pvs_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + # this check modify repository so copy it to the temp directory + logging.info("Repo copy path %s", repo_path) + + gh = Github(get_best_robot_token()) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/pvs-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/pvs-test' in images: + docker_image += ':' + images['clickhouse/pvs-test'] + + logging.info("Got docker image %s", docker_image) + + s3_helper = S3Helper('https://s3.amazonaws.com') + + licence_key = os.getenv('PVS_STUDIO_KEY') + cmd = f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --volume={repo_path}:/repo_folder --volume={temp_path}:/test_output -e LICENCE_NAME='{LICENCE_NAME}' -e LICENCE_KEY='{licence_key}' {docker_image}" + commit = get_commit(gh, pr_info.sha) + + try: + subprocess.check_output(cmd, shell=True) + except: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + try: + s3_path_prefix = str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(' ', '_') + html_urls = s3_helper.upload_test_folder_to_s3(os.path.join(temp_path, HTML_REPORT_FOLDER), s3_path_prefix) + index_html = None + + for url in html_urls: + if 'index.html' in url: + index_html = 'HTML report'.format(url) + break + + if not index_html: + commit.create_status(context=NAME, description='PVS report failed to build', state='failure', target_url=f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}") + sys.exit(1) + + txt_report = os.path.join(temp_path, TXT_REPORT_NAME) + warnings, errors = _process_txt_report(txt_report) + errors = errors + warnings + + status = 'success' + test_results = [(index_html, "Look at the report"), ("Errors count not checked", "OK")] + description = "Total errors {}".format(len(errors)) + additional_logs = [txt_report, os.path.join(temp_path, 'pvs-studio.log')] + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_logs) + + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=status, target_url=report_url) + except Exception as ex: + print("Got an exception", ex) + sys.exit(1) diff --git a/tests/ci/report.py b/tests/ci/report.py new file mode 100644 index 00000000000..5c9b174599d --- /dev/null +++ b/tests/ci/report.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +import os +import datetime + +### FIXME: BEST FRONTEND PRACTICIES BELOW + +HTML_BASE_TEST_TEMPLATE = """ + + + + {title} + + +
+ +

{header}

+ +{test_part} + + + +""" + +HTML_TEST_PART = """ + + +{headers} + +{rows} +
+""" + +BASE_HEADERS = ['Test name', 'Test status'] + + +def _format_header(header, branch_name, branch_url=None): + result = ' '.join([w.capitalize() for w in header.split(' ')]) + result = result.replace("Clickhouse", "ClickHouse") + result = result.replace("clickhouse", "ClickHouse") + if 'ClickHouse' not in result: + result = 'ClickHouse ' + result + result += ' for ' + if branch_url: + result += '{name}'.format(url=branch_url, name=branch_name) + else: + result += branch_name + return result + + +def _get_status_style(status): + style = "font-weight: bold;" + if status in ('OK', 'success', 'PASSED'): + style += 'color: #0A0;' + elif status in ('FAIL', 'failure', 'error', 'FAILED', 'Timeout'): + style += 'color: #F00;' + else: + style += 'color: #FFB400;' + return style + + +def _get_html_url(url): + if isinstance(url, str): + return '{name}'.format(url=url, name=os.path.basename(url)) + if isinstance(url, tuple): + return '{name}'.format(url=url[0], name=url[1]) + return '' + + +def create_test_html_report(header, test_result, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls=[]): + if test_result: + rows_part = "" + num_fails = 0 + has_test_time = False + has_test_logs = False + for result in test_result: + test_name = result[0] + test_status = result[1] + + test_logs = None + test_time = None + if len(result) > 2: + test_time = result[2] + has_test_time = True + + if len(result) > 3: + test_logs = result[3] + has_test_logs = True + + row = "" + row += "" + test_name + "" + style = _get_status_style(test_status) + + # Allow to quickly scroll to the first failure. + is_fail = test_status == "FAIL" or test_status == 'FLAKY' + is_fail_id = "" + if is_fail: + num_fails = num_fails + 1 + is_fail_id = 'id="fail' + str(num_fails) + '" ' + + row += ''.format(style) + test_status + "" + + if test_time is not None: + row += "" + test_time + "" + + if test_logs is not None: + test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + row += "" + test_logs_html + "" + + row += "" + rows_part += row + + headers = BASE_HEADERS + if has_test_time: + headers.append('Test time, sec.') + if has_test_logs: + headers.append('Logs') + + headers = ''.join(['' + h + '' for h in headers]) + test_part = HTML_TEST_PART.format(headers=headers, rows=rows_part) + else: + test_part = "" + + additional_html_urls = "" + for url in additional_urls: + additional_html_urls += ' ' + _get_html_url(url) + + result = HTML_BASE_TEST_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + raw_log_name=os.path.basename(raw_log_url), + raw_log_url=raw_log_url, + task_url=task_url, + test_part=test_part, + branch_name=branch_name, + commit_url=commit_url, + additional_urls=additional_html_urls + ) + return result + + +HTML_BASE_BUILD_TEMPLATE = """ + + + + +{title} + + +
+

{header}

+ + + + + + + + + + + + +{rows} +
CompilerBuild typeSanitizerBundledSplittedStatusBuild logBuild timeArtifacts
+ + + +""" + +LINK_TEMPLATE = '{text}' + + +def create_build_html_report(header, build_results, build_logs_urls, artifact_urls_list, task_url, branch_url, branch_name, commit_url): + rows = "" + for (build_result, build_log_url, artifact_urls) in zip(build_results, build_logs_urls, artifact_urls_list): + row = "" + row += "{}".format(build_result.compiler) + if build_result.build_type: + row += "{}".format(build_result.build_type) + else: + row += "{}".format("relwithdebuginfo") + if build_result.sanitizer: + row += "{}".format(build_result.sanitizer) + else: + row += "{}".format("none") + + row += "{}".format(build_result.bundled) + row += "{}".format(build_result.splitted) + + if build_result.status: + style = _get_status_style(build_result.status) + row += '{}'.format(style, build_result.status) + else: + style = _get_status_style("error") + row += '{}'.format(style, "error") + + row += 'link'.format(build_log_url) + + if build_result.elapsed_seconds: + delta = datetime.timedelta(seconds=build_result.elapsed_seconds) + else: + delta = 'unknown' + + row += '{}'.format(str(delta)) + + links = "" + link_separator = "
" + if artifact_urls: + for artifact_url in artifact_urls: + links += LINK_TEMPLATE.format(text=os.path.basename(artifact_url), url=artifact_url) + links += link_separator + if links: + links = links[:-len(link_separator)] + row += "{}".format(links) + + row += "" + rows += row + return HTML_BASE_BUILD_TEMPLATE.format( + title=_format_header(header, branch_name), + header=_format_header(header, branch_name, branch_url), + rows=rows, + task_url=task_url, + branch_name=branch_name, + commit_url=commit_url) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py new file mode 100644 index 00000000000..e6bc7259330 --- /dev/null +++ b/tests/ci/run_check.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python3 +import os +import json +import requests +from pr_info import PRInfo +import sys +import logging +from github import Github +from get_robot_token import get_best_robot_token + +NAME = 'Run Check (actions)' + +TRUSTED_ORG_IDS = { + 7409213, # yandex + 28471076, # altinity + 54801242, # clickhouse +} + +OK_TEST_LABEL = set(["can be tested", "release", "pr-documentation", "pr-doc-fix"]) +DO_NOT_TEST_LABEL = "do not test" + +# Individual trusted contirbutors who are not in any trusted organization. +# Can be changed in runtime: we will append users that we learned to be in +# a trusted org, to save GitHub API calls. +TRUSTED_CONTRIBUTORS = { + "achimbab", + "adevyatova ", # DOCSUP + "Algunenano", # Raúl Marín, Tinybird + "AnaUvarova", # DOCSUP + "anauvarova", # technical writer, Yandex + "annvsh", # technical writer, Yandex + "atereh", # DOCSUP + "azat", + "bharatnc", # Newbie, but already with many contributions. + "bobrik", # Seasoned contributor, CloundFlare + "BohuTANG", + "damozhaeva", # DOCSUP + "den-crane", + "gyuton", # DOCSUP + "gyuton", # technical writer, Yandex + "hagen1778", # Roman Khavronenko, seasoned contributor + "hczhcz", + "hexiaoting", # Seasoned contributor + "ildus", # adjust, ex-pgpro + "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto + "ka1bi4", # DOCSUP + "kirillikoff", # DOCSUP + "kitaisreal", # Seasoned contributor + "kreuzerkrieg", + "lehasm", # DOCSUP + "michon470", # DOCSUP + "MyroTk", # Tester in Altinity + "myrrc", # Michael Kot, Altinity + "nikvas0", + "nvartolomei", + "olgarev", # DOCSUP + "otrazhenia", # Yandex docs contractor + "pdv-ru", # DOCSUP + "podshumok", # cmake expert from QRator Labs + "s-mx", # Maxim Sabyanin, former employee, present contributor + "sevirov", # technical writer, Yandex + "spongedu", # Seasoned contributor + "ucasFL", # Amos Bird's friend + "vdimir", # Employee + "vzakaznikov", + "YiuRULE", + "zlobober" # Developer of YT +} + + +def pr_is_by_trusted_user(pr_user_login, pr_user_orgs): + if pr_user_login in TRUSTED_CONTRIBUTORS: + logging.info("User '{}' is trusted".format(pr_user_login)) + return True + + logging.info("User '{}' is not trusted".format(pr_user_login)) + + for org_id in pr_user_orgs: + if org_id in TRUSTED_ORG_IDS: + logging.info("Org '{}' is trusted; will mark user {} as trusted".format(org_id, pr_user_login)) + return True + logging.info("Org '{}' is not trusted".format(org_id)) + + return False + +# Returns whether we should look into individual checks for this PR. If not, it +# can be skipped entirely. +def should_run_checks_for_pr(pr_info): + # Consider the labels and whether the user is trusted. + force_labels = set(['force tests']).intersection(pr_info.labels) + if force_labels: + return True, "Labeled '{}'".format(', '.join(force_labels)) + + if 'do not test' in pr_info.labels: + return False, "Labeled 'do not test'" + + if 'can be tested' not in pr_info.labels and not pr_is_by_trusted_user(pr_info.user_login, pr_info.user_orgs): + return False, "Needs 'can be tested' label" + + if 'release' in pr_info.labels or 'pr-backport' in pr_info.labels or 'pr-cherrypick' in pr_info.labels: + return False, "Don't try new checks for release/backports/cherry-picks" + + return True, "No special conditions apply" + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + + pr_info = PRInfo(event, need_orgs=True) + can_run, description = should_run_checks_for_pr(pr_info) + gh = Github(get_best_robot_token()) + commit = get_commit(gh, pr_info.sha) + url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + if not can_run: + print("::notice ::Cannot run") + commit.create_status(context=NAME, description=description, state="failure", target_url=url) + sys.exit(1) + else: + print("::notice ::Can run") + commit.create_status(context=NAME, description=description, state="pending", target_url=url) diff --git a/tests/ci/s3_helper.py b/tests/ci/s3_helper.py new file mode 100644 index 00000000000..3c930f26634 --- /dev/null +++ b/tests/ci/s3_helper.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +import hashlib +import logging +import os +import boto3 +from botocore.exceptions import ClientError, BotoCoreError +from multiprocessing.dummy import Pool +from compress_files import compress_file_fast +from get_robot_token import get_parameter_from_ssm + +def _md5(fname): + hash_md5 = hashlib.md5() + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + logging.debug("MD5 for {} is {}".format(fname, hash_md5.hexdigest())) + return hash_md5.hexdigest() + + +def _flatten_list(lst): + result = [] + for elem in lst: + if isinstance(elem, list): + result += _flatten_list(elem) + else: + result.append(elem) + return result + + +class S3Helper(object): + def __init__(self, host): + self.session = boto3.session.Session(region_name='us-east-1') + self.client = self.session.client('s3', endpoint_url=host) + + def _upload_file_to_s3(self, bucket_name, file_path, s3_path): + logging.debug("Start uploading {} to bucket={} path={}".format(file_path, bucket_name, s3_path)) + metadata = {} + if os.path.getsize(file_path) < 64 * 1024 * 1024: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + metadata['ContentType'] = "text/plain; charset=utf-8" + logging.info("Content type %s for file path %s", "text/plain; charset=utf-8", file_path) + elif s3_path.endswith("html"): + metadata['ContentType'] = "text/html; charset=utf-8" + logging.info("Content type %s for file path %s", "text/html; charset=utf-8", file_path) + else: + logging.info("No content type provied for %s", file_path) + else: + if s3_path.endswith("txt") or s3_path.endswith("log") or s3_path.endswith("err") or s3_path.endswith("out"): + logging.info("Going to compress file log file %s to %s", file_path, file_path + ".gz") + compress_file_fast(file_path, file_path + ".gz") + file_path += ".gz" + s3_path += ".gz" + else: + logging.info("Processing file without compression") + logging.info("File is too large, do not provide content type") + + self.client.upload_file(file_path, bucket_name, s3_path, ExtraArgs=metadata) + logging.info("Upload {} to {}. Meta: {}".format(file_path, s3_path, metadata)) + return "https://s3.amazonaws.com/{bucket}/{path}".format(bucket=bucket_name, path=s3_path) + + def upload_test_report_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-test-reports', file_path, s3_path) + + def upload_build_file_to_s3(self, file_path, s3_path): + return self._upload_file_to_s3('clickhouse-builds', file_path, s3_path) + + def _upload_folder_to_s3(self, folder_path, s3_folder_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks): + logging.info("Upload folder '{}' to bucket={} of s3 folder '{}'".format(folder_path, bucket_name, s3_folder_path)) + if not os.path.exists(folder_path): + return [] + files = os.listdir(folder_path) + if not files: + return [] + + p = Pool(min(len(files), 5)) + + def task(file_name): + full_fs_path = os.path.join(folder_path, file_name) + if keep_dirs_in_s3_path: + full_s3_path = s3_folder_path + "/" + os.path.basename(folder_path) + else: + full_s3_path = s3_folder_path + + if os.path.isdir(full_fs_path): + return self._upload_folder_to_s3(full_fs_path, full_s3_path, bucket_name, keep_dirs_in_s3_path, upload_symlinks) + + if os.path.islink(full_fs_path): + if upload_symlinks: + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + return [] + + return self._upload_file_to_s3(bucket_name, full_fs_path, full_s3_path + "/" + file_name) + + return sorted(_flatten_list(list(p.map(task, files)))) + + def upload_build_folder_to_s3(self, folder_path, s3_folder_path, keep_dirs_in_s3_path=True, upload_symlinks=True): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-builds', keep_dirs_in_s3_path, upload_symlinks) + + def upload_test_folder_to_s3(self, folder_path, s3_folder_path): + return self._upload_folder_to_s3(folder_path, s3_folder_path, 'clickhouse-test-reports', True, True) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py new file mode 100644 index 00000000000..4a8cde70bc2 --- /dev/null +++ b/tests/ci/style_check.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +from github import Github +from report import create_test_html_report +import shutil +import logging +import subprocess +import os +import csv +from s3_helper import S3Helper +import time +import json +from pr_info import PRInfo +from get_robot_token import get_best_robot_token + +NAME = "Style Check (actions)" + + +def process_logs(s3_client, additional_logs, s3_path_prefix): + additional_urls = [] + for log_path in additional_logs: + if log_path: + additional_urls.append( + s3_client.upload_test_report_to_s3( + log_path, + s3_path_prefix + "/" + os.path.basename(log_path))) + + return additional_urls + + +def process_result(result_folder): + test_results = [] + additional_files = [] + # Just upload all files from result_folder. + # If task provides processed results, then it's responsible for content of result_folder. + if os.path.exists(result_folder): + test_files = [f for f in os.listdir(result_folder) if os.path.isfile(os.path.join(result_folder, f))] + additional_files = [os.path.join(result_folder, f) for f in test_files] + + status_path = os.path.join(result_folder, "check_status.tsv") + logging.info("Found test_results.tsv") + status = list(csv.reader(open(status_path, 'r'), delimiter='\t')) + if len(status) != 1 or len(status[0]) != 2: + return "error", "Invalid check_status.tsv", test_results, additional_files + state, description = status[0][0], status[0][1] + + try: + results_path = os.path.join(result_folder, "test_results.tsv") + test_results = list(csv.reader(open(results_path, 'r'), delimiter='\t')) + if len(test_results) == 0: + raise Exception("Empty results") + + return state, description, test_results, additional_files + except Exception: + if state == "success": + state, description = "error", "Failed to read test_results.tsv" + return state, description, test_results, additional_files + +def upload_results(s3_client, pr_number, commit_sha, test_results, additional_files): + s3_path_prefix = f"{pr_number}/{commit_sha}/style_check" + additional_urls = process_logs(s3_client, additional_files, s3_path_prefix) + + branch_url = "https://github.com/ClickHouse/ClickHouse/commits/master" + branch_name = "master" + if pr_number != 0: + branch_name = "PR #{}".format(pr_number) + branch_url = "https://github.com/ClickHouse/ClickHouse/pull/" + str(pr_number) + commit_url = f"https://github.com/ClickHouse/ClickHouse/commit/{commit_sha}" + + task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" + + raw_log_url = additional_urls[0] + additional_urls.pop(0) + + html_report = create_test_html_report(NAME, test_results, raw_log_url, task_url, branch_url, branch_name, commit_url, additional_urls) + with open('report.html', 'w') as f: + f.write(html_report) + + url = s3_client.upload_test_report_to_s3('report.html', s3_path_prefix + ".html") + logging.info("Search result in url %s", url) + return url + + +def get_commit(gh, commit_sha): + repo = gh.get_repo(os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse")) + commit = repo.get_commit(commit_sha) + return commit + +def update_check_with_curl(check_id): + cmd_template = ("curl -v --request PATCH --url https://api.github.com/repos/ClickHouse/ClickHouse/check-runs/{} " + "--header 'authorization: Bearer {}' " + "--header 'Accept: application/vnd.github.v3+json' " + "--header 'content-type: application/json' " + "-d '{{\"name\" : \"hello-world-name\"}}'") + cmd = cmd_template.format(check_id, os.getenv("GITHUB_TOKEN")) + subprocess.check_call(cmd, shell=True) + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + repo_path = os.path.join(os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../"))) + temp_path = os.path.join(os.getenv("RUNNER_TEMP", os.path.abspath("./temp")), 'style_check') + + with open(os.getenv('GITHUB_EVENT_PATH'), 'r') as event_file: + event = json.load(event_file) + pr_info = PRInfo(event) + + if not os.path.exists(temp_path): + os.makedirs(temp_path) + + gh = Github(get_best_robot_token()) + + images_path = os.path.join(temp_path, 'changed_images.json') + docker_image = 'clickhouse/style-test' + if os.path.exists(images_path): + logging.info("Images file exists") + with open(images_path, 'r') as images_fd: + images = json.load(images_fd) + logging.info("Got images %s", images) + if 'clickhouse/style-test' in images: + docker_image += ':' + images['clickhouse/style-test'] + + logging.info("Got docker image %s", docker_image) + for i in range(10): + try: + subprocess.check_output(f"docker pull {docker_image}", shell=True) + break + except Exception as ex: + time.sleep(i * 3) + logging.info("Got execption pulling docker %s", ex) + else: + raise Exception(f"Cannot pull dockerhub for image {docker_image}") + + s3_helper = S3Helper('https://s3.amazonaws.com') + + subprocess.check_output(f"docker run -u $(id -u ${{USER}}):$(id -g ${{USER}}) --cap-add=SYS_PTRACE --volume={repo_path}:/ClickHouse --volume={temp_path}:/test_output {docker_image}", shell=True) + state, description, test_results, additional_files = process_result(temp_path) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, additional_files) + print("::notice ::Report url: {}".format(report_url)) + commit = get_commit(gh, pr_info.sha) + commit.create_status(context=NAME, description=description, state=state, target_url=report_url) diff --git a/tests/ci/termination_lambda/Dockerfile b/tests/ci/termination_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/termination_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/termination_lambda/app.py b/tests/ci/termination_lambda/app.py new file mode 100644 index 00000000000..0b39cf73f25 --- /dev/null +++ b/tests/ci/termination_lambda/app.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time +from collections import namedtuple + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + + +RunnerDescription = namedtuple('RunnerDescription', ['id', 'name', 'tags', 'offline', 'busy']) + +def list_runners(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.get("https://api.github.com/orgs/ClickHouse/actions/runners", headers=headers) + response.raise_for_status() + data = response.json() + print("Total runners", data['total_count']) + runners = data['runners'] + result = [] + for runner in runners: + tags = [tag['name'] for tag in runner['labels']] + desc = RunnerDescription(id=runner['id'], name=runner['name'], tags=tags, + offline=runner['status']=='offline', busy=runner['busy']) + result.append(desc) + return result + +def push_metrics_to_cloudwatch(listed_runners, namespace): + import boto3 + client = boto3.client('cloudwatch') + metrics_data = [] + busy_runners = sum(1 for runner in listed_runners if runner.busy) + metrics_data.append({ + 'MetricName': 'BusyRunners', + 'Value': busy_runners, + 'Unit': 'Count', + }) + total_active_runners = sum(1 for runner in listed_runners if not runner.offline) + metrics_data.append({ + 'MetricName': 'ActiveRunners', + 'Value': total_active_runners, + 'Unit': 'Count', + }) + total_runners = len(listed_runners) + metrics_data.append({ + 'MetricName': 'TotalRunners', + 'Value': total_runners, + 'Unit': 'Count', + }) + if total_active_runners == 0: + busy_ratio = 100 + else: + busy_ratio = busy_runners / total_active_runners * 100 + + metrics_data.append({ + 'MetricName': 'BusyRunnersRatio', + 'Value': busy_ratio, + 'Unit': 'Percent', + }) + + client.put_metric_data(Namespace='RunnersMetrics', MetricData=metrics_data) + + +def how_many_instances_to_kill(event_data): + data_array = event_data['CapacityToTerminate'] + to_kill_by_zone = {} + for av_zone in data_array: + zone_name = av_zone['AvailabilityZone'] + to_kill = av_zone['Capacity'] + if zone_name not in to_kill_by_zone: + to_kill_by_zone[zone_name] = 0 + + to_kill_by_zone[zone_name] += to_kill + return to_kill_by_zone + +def get_candidates_to_be_killed(event_data): + data_array = event_data['Instances'] + instances_by_zone = {} + for instance in data_array: + zone_name = instance['AvailabilityZone'] + instance_id = instance['InstanceId'] + if zone_name not in instances_by_zone: + instances_by_zone[zone_name] = [] + instances_by_zone[zone_name].append(instance_id) + + return instances_by_zone + +def delete_runner(access_token, runner): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + + response = requests.delete(f"https://api.github.com/orgs/ClickHouse/actions/runners/{runner.id}", headers=headers) + response.raise_for_status() + print(f"Response code deleting {runner.name} is {response.status_code}") + return response.status_code == 204 + + +def main(github_secret_key, github_app_id, event): + print("Got event", json.dumps(event, sort_keys=True, indent=4)) + to_kill_by_zone = how_many_instances_to_kill(event) + instances_by_zone = get_candidates_to_be_killed(event) + + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + + runners = list_runners(access_token) + + to_delete_runners = [] + instances_to_kill = [] + for zone in to_kill_by_zone: + num_to_kill = to_kill_by_zone[zone] + candidates = instances_by_zone[zone] + if num_to_kill > len(candidates): + raise Exception(f"Required to kill {num_to_kill}, but have only {len(candidates)} candidates in AV {zone}") + + delete_for_av = [] + for candidate in candidates: + if candidate not in set([runner.name for runner in runners]): + print(f"Candidate {candidate} was not in runners list, simply delete it") + instances_to_kill.append(candidate) + + for candidate in candidates: + if len(delete_for_av) + len(instances_to_kill) == num_to_kill: + break + if candidate in instances_to_kill: + continue + + for runner in runners: + if runner.name == candidate: + if not runner.busy: + print(f"Runner {runner.name} is not busy and can be deleted from AV {zone}") + delete_for_av.append(runner) + else: + print(f"Runner {runner.name} is busy, not going to delete it") + break + + if len(delete_for_av) < num_to_kill: + print(f"Checked all candidates for av {zone}, get to delete {len(delete_for_av)}, but still cannot get required {num_to_kill}") + to_delete_runners += delete_for_av + + print("Got instances to kill: ", ', '.join(instances_to_kill)) + print("Going to delete runners:", ', '.join([runner.name for runner in to_delete_runners])) + for runner in to_delete_runners: + if delete_runner(access_token, runner): + print(f"Runner {runner.name} successfuly deleted from github") + instances_to_kill.append(runner.name) + else: + print(f"Cannot delete {runner.name} from github") + + ## push metrics + #runners = list_runners(access_token) + #push_metrics_to_cloudwatch(runners, 'RunnersMetrics') + + response = { + "InstanceIDs": instances_to_kill + } + print(response) + return response + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + return main(private_key, app_id, event) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get list of runners and their states') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + sample_event = { + "AutoScalingGroupARN": "arn:aws:autoscaling:us-east-1::autoScalingGroup:d4738357-2d40-4038-ae7e-b00ae0227003:autoScalingGroupName/my-asg", + "AutoScalingGroupName": "my-asg", + "CapacityToTerminate": [ + { + "AvailabilityZone": "us-east-1b", + "Capacity": 1, + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "Capacity": 2, + "InstanceMarketOption": "OnDemand" + } + ], + "Instances": [ + { + "AvailabilityZone": "us-east-1b", + "InstanceId": "i-08d0b3c1a137e02a5", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-27-227.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + }, + { + "AvailabilityZone": "us-east-1c", + "InstanceId": "ip-172-31-45-253.eu-west-1.compute.internal", + "InstanceType": "t2.nano", + "InstanceMarketOption": "OnDemand" + } + ], + "Cause": "SCALE_IN" + } + + main(private_key, args.app_id, sample_event) diff --git a/tests/ci/termination_lambda/requirements.txt b/tests/ci/termination_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/termination_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/token_lambda/Dockerfile b/tests/ci/token_lambda/Dockerfile new file mode 100644 index 00000000000..f53be71a893 --- /dev/null +++ b/tests/ci/token_lambda/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.9 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Install the function's dependencies using file requirements.txt +# from your project folder. + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile) +CMD [ "app.handler" ] diff --git a/tests/ci/token_lambda/app.py b/tests/ci/token_lambda/app.py new file mode 100644 index 00000000000..731d6c040de --- /dev/null +++ b/tests/ci/token_lambda/app.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +import requests +import argparse +import jwt +import sys +import json +import time + +def get_installation_id(jwt_token): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.get("https://api.github.com/app/installations", headers=headers) + response.raise_for_status() + data = response.json() + return data[0]['id'] + +def get_access_token(jwt_token, installation_id): + headers = { + "Authorization": f"Bearer {jwt_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post(f"https://api.github.com/app/installations/{installation_id}/access_tokens", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_runner_registration_token(access_token): + headers = { + "Authorization": f"token {access_token}", + "Accept": "application/vnd.github.v3+json", + } + response = requests.post("https://api.github.com/orgs/ClickHouse/actions/runners/registration-token", headers=headers) + response.raise_for_status() + data = response.json() + return data['token'] + +def get_key_and_app_from_aws(): + import boto3 + secret_name = "clickhouse_github_secret_key" + session = boto3.session.Session() + client = session.client( + service_name='secretsmanager', + ) + get_secret_value_response = client.get_secret_value( + SecretId=secret_name + ) + data = json.loads(get_secret_value_response['SecretString']) + return data['clickhouse-app-key'], int(data['clickhouse-app-id']) + + +def main(github_secret_key, github_app_id, push_to_ssm, ssm_parameter_name): + payload = { + "iat": int(time.time()) - 60, + "exp": int(time.time()) + (10 * 60), + "iss": github_app_id, + } + + encoded_jwt = jwt.encode(payload, github_secret_key, algorithm="RS256") + installation_id = get_installation_id(encoded_jwt) + access_token = get_access_token(encoded_jwt, installation_id) + runner_registration_token = get_runner_registration_token(access_token) + + if push_to_ssm: + import boto3 + + print("Trying to put params into ssm manager") + client = boto3.client('ssm') + client.put_parameter( + Name=ssm_parameter_name, + Value=runner_registration_token, + Type='SecureString', + Overwrite=True) + else: + print("Not push token to AWS Parameter Store, just print:", runner_registration_token) + + +def handler(event, context): + private_key, app_id = get_key_and_app_from_aws() + main(private_key, app_id, True, 'github_runner_registration_token') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Get new token from github to add runners') + parser.add_argument('-p', '--private-key-path', help='Path to file with private key') + parser.add_argument('-k', '--private-key', help='Private key') + parser.add_argument('-a', '--app-id', type=int, help='GitHub application ID', required=True) + parser.add_argument('--push-to-ssm', action='store_true', help='Store received token in parameter store') + parser.add_argument('--ssm-parameter-name', default='github_runner_registration_token', help='AWS paramater store parameter name') + + args = parser.parse_args() + + if not args.private_key_path and not args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key_path and args.private_key: + print("Either --private-key-path or --private-key must be specified", file=sys.stderr) + + if args.private_key: + private_key = args.private_key + else: + with open(args.private_key_path, 'r') as key_file: + private_key = key_file.read() + + main(private_key, args.app_id, args.push_to_ssm, args.ssm_parameter_name) diff --git a/tests/ci/token_lambda/requirements.txt b/tests/ci/token_lambda/requirements.txt new file mode 100644 index 00000000000..c0dcf4a4dde --- /dev/null +++ b/tests/ci/token_lambda/requirements.txt @@ -0,0 +1,3 @@ +requests +PyJWT +cryptography diff --git a/tests/ci/worker/init.sh b/tests/ci/worker/init.sh new file mode 100644 index 00000000000..2f6638f14b5 --- /dev/null +++ b/tests/ci/worker/init.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash +set -euo pipefail + +echo "Running init script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_HOME=/home/ubuntu/actions-runner + +echo "Receiving token" +export RUNNER_TOKEN=`/usr/local/bin/aws ssm get-parameter --name github_runner_registration_token --with-decryption --output text --query Parameter.Value` +export RUNNER_URL="https://github.com/ClickHouse" +# Funny fact, but metadata service has fixed IP +export INSTANCE_ID=`curl -s http://169.254.169.254/latest/meta-data/instance-id` + +cd $RUNNER_HOME + +echo "Going to configure runner" +sudo -u ubuntu ./config.sh --url $RUNNER_URL --token $RUNNER_TOKEN --name $INSTANCE_ID --runnergroup Default --labels 'self-hosted,Linux,X64' --work _work + +echo "Run" +sudo -u ubuntu ./run.sh diff --git a/tests/ci/worker/ubuntu_ami.sh b/tests/ci/worker/ubuntu_ami.sh new file mode 100644 index 00000000000..2609c1a69f3 --- /dev/null +++ b/tests/ci/worker/ubuntu_ami.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +set -euo pipefail + +echo "Running prepare script" +export DEBIAN_FRONTEND=noninteractive +export RUNNER_VERSION=2.283.1 +export RUNNER_HOME=/home/ubuntu/actions-runner + +apt-get update + +apt-get install --yes --no-install-recommends \ + apt-transport-https \ + ca-certificates \ + curl \ + gnupg \ + lsb-release \ + python3-pip \ + unzip + +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg + +echo "deb [arch=amd64 signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" | tee /etc/apt/sources.list.d/docker.list > /dev/null + +apt-get update + +apt-get install --yes --no-install-recommends docker-ce docker-ce-cli containerd.io + +usermod -aG docker ubuntu + +pip install boto3 pygithub requests urllib3 unidiff + +mkdir -p $RUNNER_HOME && cd $RUNNER_HOME + +curl -O -L https://github.com/actions/runner/releases/download/v$RUNNER_VERSION/actions-runner-linux-x64-$RUNNER_VERSION.tar.gz + +tar xzf ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +rm -f ./actions-runner-linux-x64-$RUNNER_VERSION.tar.gz +./bin/installdependencies.sh + +chown -R ubuntu:ubuntu $RUNNER_HOME + +cd /home/ubuntu +curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" +unzip awscliv2.zip +./aws/install + +rm -rf /home/ubuntu/awscliv2.zip /home/ubuntu/aws diff --git a/tests/clickhouse-test b/tests/clickhouse-test index b2a9358371a..5f6960e57c4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -109,7 +109,10 @@ def clickhouse_execute_json(base_args, query, timeout=30, settings=None): data = clickhouse_execute_http(base_args, query, timeout, settings, 'JSONEachRow') if not data: return None - return json.loads(data) + rows = [] + for row in data.strip().splitlines(): + rows.append(json.loads(row)) + return rows class Terminated(KeyboardInterrupt): @@ -475,19 +478,19 @@ class TestCase: if os.path.isfile(self.stdout_file): description += ", result:\n\n" - description += '\n'.join(open(self.stdout_file).read().split('\n')[:100]) + description += '\n'.join(open(self.stdout_file).read().splitlines()[:100]) description += '\n' description += "\nstdout:\n{}\n".format(stdout) return TestResult(self.name, TestStatus.FAIL, reason, total_time, description) if stderr: - description += "\n{}\n".format('\n'.join(stderr.split('\n')[:100])) + description += "\n{}\n".format('\n'.join(stderr.splitlines()[:100])) description += "\nstdout:\n{}\n".format(stdout) return TestResult(self.name, TestStatus.FAIL, FailureReason.STDERR, total_time, description) if 'Exception' in stdout: - description += "\n{}\n".format('\n'.join(stdout.split('\n')[:100])) + description += "\n{}\n".format('\n'.join(stdout.splitlines()[:100])) return TestResult(self.name, TestStatus.FAIL, FailureReason.EXCEPTION, total_time, description) if '@@SKIP@@' in stdout: @@ -1306,7 +1309,7 @@ if __name__ == '__main__': parser.add_argument('-j', '--jobs', default=1, nargs='?', type=int, help='Run all tests in parallel') parser.add_argument('--test-runs', default=1, nargs='?', type=int, help='Run each test many times (useful for e.g. flaky check)') parser.add_argument('-U', '--unified', default=3, type=int, help='output NUM lines of unified context') - parser.add_argument('-r', '--server-check-retries', default=30, type=int, help='Num of tries to execute SELECT 1 before tests started') + parser.add_argument('-r', '--server-check-retries', default=90, type=int, help='Num of tries to execute SELECT 1 before tests started') parser.add_argument('--db-engine', help='Database engine name') parser.add_argument('--replicated-database', action='store_true', default=False, help='Run tests with Replicated database engine') parser.add_argument('--fast-tests-only', action='store_true', default=False, help='Run only fast tests (the tests without the "no-fasttest" tag)') @@ -1392,7 +1395,6 @@ if __name__ == '__main__': http_port = os.getenv("CLICKHOUSE_PORT_HTTP") if http_port is not None: args.http_port = int(http_port) - args.client += f" --port={http_port}" else: args.http_port = 8123 diff --git a/tests/config/install.sh b/tests/config/install.sh index 04b655d8769..a451c9f3ed1 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -49,6 +49,7 @@ ln -sf $SRC_PATH/users.d/opentelemetry.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/remote_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/session_log_test.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/memory_profiler.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/no_fsync_metadata.xml $DEST_SERVER_PATH/users.d/ # FIXME DataPartsExchange may hang for http_send_timeout seconds # when nobody is going to read from the other side of socket (due to "Fetching of part was cancelled"), diff --git a/tests/config/users.d/no_fsync_metadata.xml b/tests/config/users.d/no_fsync_metadata.xml new file mode 100644 index 00000000000..6b9d69927f8 --- /dev/null +++ b/tests/config/users.d/no_fsync_metadata.xml @@ -0,0 +1,11 @@ + + + + + false + + + diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 4a9afc348cf..bf25f1fa484 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -985,10 +985,10 @@ "RANGE" "rank" "rankCorr" -"readWktMultiPolygon" -"readWktPoint" -"readWktPolygon" -"readWktRing" +"readWKTMultiPolygon" +"readWKTPoint" +"readWKTPolygon" +"readWKTRing" "REAL" "REFRESH" "regexpQuoteMeta" @@ -1177,6 +1177,7 @@ "sumWithOverflow" "SUSPEND" "svg" +"SVG" "SYNC" "synonyms" "SYNTAX" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index fb35375f284..722e931dc09 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -52,6 +52,7 @@ "h3GetResolution" "h3EdgeLengthM" "svg" +"SVG" "equals" "geohashesInBox" "polygonsIntersectionCartesian" @@ -114,7 +115,7 @@ "replaceOne" "emptyArrayInt32" "extract" -"readWktPolygon" +"readWKTPolygon" "notILike" "geohashDecode" "toModifiedJulianDay" @@ -164,7 +165,7 @@ "lessOrEquals" "subtractQuarters" "ngramSearch" -"readWktRing" +"readWKTRing" "trimRight" "endsWith" "ngramDistanceCaseInsensitive" @@ -713,13 +714,13 @@ "s2RectContains" "toDate" "regexpQuoteMeta" -"readWktMultiPolygon" +"readWKTMultiPolygon" "emptyArrayString" "bitmapOr" "cutWWW" "emptyArrayInt8" "less" -"readWktPoint" +"readWKTPoint" "reinterpretAsDateTime" "notEquals" "geoToS2" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 51b7bfcbcb8..3854cadaba5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -2320,6 +2320,9 @@ class ClickHouseInstance: def replace_config(self, path_to_config, replacement): self.exec_in_container(["bash", "-c", "echo '{}' > {}".format(replacement, path_to_config)]) + def replace_in_config(self, path_to_config, replace, replacement): + self.exec_in_container(["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"]) + def create_dir(self, destroy_dir=True): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/test_keeper_three_nodes_start/__init__.py b/tests/integration/test_keeper_three_nodes_start/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml new file mode 100644 index 00000000000..bc62d817074 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper1.xml @@ -0,0 +1,32 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + non_existing_node + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml new file mode 100644 index 00000000000..a6c476fb449 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/configs/enable_keeper2.xml @@ -0,0 +1,32 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + non_existing_node + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_start/test.py b/tests/integration/test_keeper_three_nodes_start/test.py new file mode 100644 index 00000000000..7828f21d0d7 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_start/test.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 + +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import random +import string +import os +import time +from multiprocessing.dummy import Pool +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml'], stay_alive=True) + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + +def test_smoke(): + try: + cluster.start() + + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_alive", b"aaaa") + + finally: + cluster.shutdown() diff --git a/tests/integration/test_keeper_three_nodes_two_alive/__init__.py b/tests/integration/test_keeper_three_nodes_two_alive/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml new file mode 100644 index 00000000000..510424715c4 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml @@ -0,0 +1,32 @@ + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + node3 + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml new file mode 100644 index 00000000000..264601d8c98 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml @@ -0,0 +1,32 @@ + + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + node3 + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml new file mode 100644 index 00000000000..7f9775939bb --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml @@ -0,0 +1,32 @@ + + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + + 5000 + 10000 + trace + + + + + 1 + node1 + 44444 + + + 2 + node2 + 44444 + + + 3 + node3 + 44444 + + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml new file mode 100644 index 00000000000..384e984f210 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/keeper_conf.xml @@ -0,0 +1,16 @@ + + + + node1 + 9181 + + + node2 + 9181 + + + node3 + 9181 + + + diff --git a/tests/integration/test_keeper_three_nodes_two_alive/test.py b/tests/integration/test_keeper_three_nodes_two_alive/test.py new file mode 100644 index 00000000000..eb63d28b3e2 --- /dev/null +++ b/tests/integration/test_keeper_three_nodes_two_alive/test.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +import pytest +from helpers.cluster import ClickHouseCluster +import random +import string +import os +import time +from multiprocessing.dummy import Pool +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +from kazoo.client import KazooClient, KazooState + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/enable_keeper1.xml', 'configs/keeper_conf.xml'], stay_alive=True) +node2 = cluster.add_instance('node2', main_configs=['configs/enable_keeper2.xml', 'configs/keeper_conf.xml'], stay_alive=True) +node3 = cluster.add_instance('node3', main_configs=['configs/enable_keeper3.xml', 'configs/keeper_conf.xml'], stay_alive=True) + + +def get_fake_zk(nodename, timeout=30.0): + _fake_zk_instance = KazooClient(hosts=cluster.get_instance_ip(nodename) + ":9181", timeout=timeout) + _fake_zk_instance.start() + return _fake_zk_instance + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def start(node): + node.start_clickhouse() + + +def test_start_offline(started_cluster): + p = Pool(3) + try: + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_alive", b"aaaa") + + node1.stop_clickhouse() + node2.stop_clickhouse() + node3.stop_clickhouse() + + time.sleep(3) + p.map(start, [node2, node3]) + + assert node2.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + assert node3.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + + node2_zk = get_fake_zk("node2") + node2_zk.create("/test_dead", b"data") + finally: + p.map(start, [node1, node2, node3]) + + +def test_start_non_existing(started_cluster): + p = Pool(3) + try: + node1.stop_clickhouse() + node2.stop_clickhouse() + node3.stop_clickhouse() + + node1.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper1.xml', 'node3', 'non_existing_node') + node2.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper2.xml', 'node3', 'non_existing_node') + + time.sleep(3) + p.map(start, [node2, node1]) + + assert node1.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + assert node2.contains_in_log("Cannot connect to ZooKeeper (or Keeper) before internal Keeper start") + + node2_zk = get_fake_zk("node2") + node2_zk.create("/test_non_exising", b"data") + finally: + node1.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper1.xml', 'non_existing_node', 'node3') + node2.replace_in_config('/etc/clickhouse-server/config.d/enable_keeper2.xml', 'non_existing_node', 'node3') + p.map(start, [node1, node2, node3]) + +def test_restart_third_node(started_cluster): + node1_zk = get_fake_zk("node1") + node1_zk.create("/test_restart", b"aaaa") + + node3.restart_clickhouse() + + assert node3.contains_in_log("Connected to ZooKeeper (or Keeper) before internal Keeper start") diff --git a/tests/integration/test_log_family_hdfs/test.py b/tests/integration/test_log_family_hdfs/test.py index a38b067358e..7bb9cdfeaf5 100644 --- a/tests/integration/test_log_family_hdfs/test.py +++ b/tests/integration/test_log_family_hdfs/test.py @@ -31,10 +31,20 @@ def assert_objects_count(started_cluster, objects_count, path='data/'): hdfs_objects = fs.listdir('/clickhouse') assert objects_count == len(hdfs_objects) - +# TinyLog: files: id.bin, sizes.json +# INSERT overwrites 1 file (`sizes.json`) and appends 1 file (`id.bin`), so +# files_overhead=1, files_overhead_per_insert=1 +# +# Log: files: id.bin, __marks.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`id.bin`, `__marks.mrk`), so +# files_overhead=1, files_overhead_per_insert=2 +# +# StripeLog: files: data.bin, index.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`index.mrk`, `data.bin`), so +# files_overhead=1, files_overhead_per_insert=2 @pytest.mark.parametrize( "log_engine,files_overhead,files_overhead_per_insert", - [("TinyLog", 1, 1), ("Log", 2, 1), ("StripeLog", 1, 2)]) + [("TinyLog", 1, 1), ("Log", 1, 2), ("StripeLog", 1, 2)]) def test_log_family_hdfs(started_cluster, log_engine, files_overhead, files_overhead_per_insert): node = started_cluster.instances["node"] diff --git a/tests/integration/test_log_family_s3/test.py b/tests/integration/test_log_family_s3/test.py index 71d47a8a2e8..8531edd635f 100644 --- a/tests/integration/test_log_family_s3/test.py +++ b/tests/integration/test_log_family_s3/test.py @@ -30,10 +30,20 @@ def assert_objects_count(cluster, objects_count, path='data/'): logging.info("Existing S3 object: %s", str(object_meta)) assert objects_count == len(s3_objects) - +# TinyLog: files: id.bin, sizes.json +# INSERT overwrites 1 file (`sizes.json`) and appends 1 file (`id.bin`), so +# files_overhead=1, files_overhead_per_insert=1 +# +# Log: files: id.bin, __marks.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`id.bin`, `__marks.mrk`), so +# files_overhead=1, files_overhead_per_insert=2 +# +# StripeLog: files: data.bin, index.mrk, sizes.json +# INSERT overwrites 1 file (`sizes.json`), and appends 2 files (`index.mrk`, `data.bin`), so +# files_overhead=1, files_overhead_per_insert=2 @pytest.mark.parametrize( "log_engine,files_overhead,files_overhead_per_insert", - [("TinyLog", 1, 1), ("Log", 2, 1), ("StripeLog", 1, 2)]) + [("TinyLog", 1, 1), ("Log", 1, 2), ("StripeLog", 1, 2)]) def test_log_family_s3(cluster, log_engine, files_overhead, files_overhead_per_insert): node = cluster.instances["node"] diff --git a/tests/integration/test_log_levels_update/__init__.py b/tests/integration/test_log_levels_update/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_log_levels_update/configs/log.xml b/tests/integration/test_log_levels_update/configs/log.xml new file mode 100644 index 00000000000..668a15f6afd --- /dev/null +++ b/tests/integration/test_log_levels_update/configs/log.xml @@ -0,0 +1,6 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + + \ No newline at end of file diff --git a/tests/integration/test_log_levels_update/test.py b/tests/integration/test_log_levels_update/test.py new file mode 100644 index 00000000000..dca660a2982 --- /dev/null +++ b/tests/integration/test_log_levels_update/test.py @@ -0,0 +1,50 @@ +import pytest +import re + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, name="log_quries_probability") +node = cluster.add_instance('node', with_zookeeper=False) + +config = ''' + + information + /var/log/clickhouse-server/clickhouse-server.log + +''' + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def get_log(node): + return node.exec_in_container(["bash", "-c", "cat /var/log/clickhouse-server/clickhouse-server.log"]) + +def test_log_levels_update(start_cluster): + # Make sure that there are enough log messages for the test + for i in range(5): + node.query("SELECT 1") + + log = get_log(node) + assert re.search("(|)", log) + + node.replace_config("/etc/clickhouse-server/config.d/log.xml", config) + node.query("SYSTEM RELOAD CONFIG;") + node.exec_in_container(["bash", "-c", "> /var/log/clickhouse-server/clickhouse-server.log"]) + + for i in range(5): + node.query("SELECT 1") + + log = get_log(node) + assert len(log) > 0 + assert not re.search("(|)", log) + + + diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/__init__.py b/tests/integration/test_replicated_merge_tree_encrypted_disk/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/key_a.xml b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/key_a.xml new file mode 100644 index 00000000000..9d866c91f54 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/key_a.xml @@ -0,0 +1,9 @@ + + + + + aaaaaaaaaaaaaaaa + + + + diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/key_b.xml b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/key_b.xml new file mode 100644 index 00000000000..c34283160a5 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/key_b.xml @@ -0,0 +1,9 @@ + + + + + bbbbbbbbbbbbbbbb + + + + diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/remote_servers.xml b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/remote_servers.xml new file mode 100644 index 00000000000..84d16206080 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/remote_servers.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/storage.xml b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/storage.xml new file mode 100644 index 00000000000..312a009ed9a --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/configs/storage.xml @@ -0,0 +1,25 @@ + + + + + local + /disk/ + + + encrypted + disk_local + encrypted/ + 0000000000000000 + + + + + +
+ disk_encrypted +
+
+
+
+
+
diff --git a/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py new file mode 100644 index 00000000000..bc5a419aaf2 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encrypted_disk/test.py @@ -0,0 +1,87 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry, TSV +import os + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance("node1", + main_configs=["configs/remote_servers.xml", "configs/storage.xml"], + tmpfs=["/disk:size=100M"], + macros={'replica': 'node1'}, + with_zookeeper=True) + +node2 = cluster.add_instance("node2", + main_configs=["configs/remote_servers.xml", "configs/storage.xml"], + tmpfs=["/disk:size=100M"], + macros={'replica': 'node2'}, + with_zookeeper=True) + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield + finally: + cluster.shutdown() + + +def copy_keys(instance, keys_file_name): + instance.copy_file_to_container(os.path.join(SCRIPT_DIR, f"configs/{keys_file_name}.xml"), "/etc/clickhouse-server/config.d/z_keys.xml") + instance.query("SYSTEM RELOAD CONFIG") + +def create_table(): + node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY") + node1.query( + """ + CREATE TABLE tbl ON CLUSTER 'cluster' ( + id Int64, + str String + ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}') + ORDER BY id + SETTINGS storage_policy='encrypted_policy' + """ + ) + +def insert_data(): + node1.query("INSERT INTO tbl VALUES (1, 'str1')") + node2.query("INSERT INTO tbl VALUES (1, 'str1')") # Test deduplication + node2.query("INSERT INTO tbl VALUES (2, 'str2')") + +def optimize_table(): + node1.query("OPTIMIZE TABLE tbl ON CLUSTER 'cluster' FINAL") + +def check_table(): + expected=[[1, 'str1'], [2, 'str2']] + assert node1.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) + assert node2.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) + assert node1.query("CHECK TABLE tbl") == "1\n" + assert node2.query("CHECK TABLE tbl") == "1\n" + + +# Actual tests: + +def test_same_keys(): + copy_keys(node1, 'key_a') + copy_keys(node2, 'key_a') + create_table() + + insert_data() + check_table() + + optimize_table() + check_table() + + +def test_different_keys(): + copy_keys(node1, 'key_a') + copy_keys(node2, 'key_b') + create_table() + + insert_data() + check_table() + + optimize_table() + check_table() diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/__init__.py b/tests/integration/test_replicated_merge_tree_encryption_codec/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/encryption_codec.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/encryption_codec.xml new file mode 100644 index 00000000000..eb4f8abaa77 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/encryption_codec.xml @@ -0,0 +1,7 @@ + + + + 0000000000000000 + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a.xml new file mode 100644 index 00000000000..a31978e7015 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a.xml @@ -0,0 +1,7 @@ + + + + aaaaaaaaaaaaaaaa + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_b_current_a.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_b_current_a.xml new file mode 100644 index 00000000000..01ca9123ccb --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_b_current_a.xml @@ -0,0 +1,10 @@ + + + + + aaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbb + 0 + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_b_current_b.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_b_current_b.xml new file mode 100644 index 00000000000..98cf6ced0c7 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_b_current_b.xml @@ -0,0 +1,10 @@ + + + + + aaaaaaaaaaaaaaaa + bbbbbbbbbbbbbbbb + 1 + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_nonce_x.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_nonce_x.xml new file mode 100644 index 00000000000..40c5adab19b --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_nonce_x.xml @@ -0,0 +1,8 @@ + + + + aaaaaaaaaaaaaaaa + xxxxxxxxxxxx + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_nonce_y.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_nonce_y.xml new file mode 100644 index 00000000000..eadfb6e6733 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_a_and_nonce_y.xml @@ -0,0 +1,8 @@ + + + + aaaaaaaaaaaaaaaa + yyyyyyyyyyyy + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_b.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_b.xml new file mode 100644 index 00000000000..e336324f648 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/key_b.xml @@ -0,0 +1,7 @@ + + + + bbbbbbbbbbbbbbbb + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/configs/remote_servers.xml b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/remote_servers.xml new file mode 100644 index 00000000000..84d16206080 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/configs/remote_servers.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_replicated_merge_tree_encryption_codec/test.py b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py new file mode 100644 index 00000000000..3aec2259703 --- /dev/null +++ b/tests/integration/test_replicated_merge_tree_encryption_codec/test.py @@ -0,0 +1,110 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry, TSV +import os + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance("node1", + main_configs=["configs/remote_servers.xml", "configs/encryption_codec.xml"], + macros={'replica': 'node1'}, + with_zookeeper=True) + +node2 = cluster.add_instance("node2", + main_configs=["configs/remote_servers.xml", "configs/encryption_codec.xml"], + macros={'replica': 'node2'}, + with_zookeeper=True) + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield + finally: + cluster.shutdown() + + +def copy_keys(instance, keys_file_name): + instance.copy_file_to_container(os.path.join(SCRIPT_DIR, f"configs/{keys_file_name}.xml"), "/etc/clickhouse-server/config.d/z_keys.xml") + instance.query("SYSTEM RELOAD CONFIG") + +def create_table(): + node1.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY") + node1.query( + """ + CREATE TABLE tbl ON CLUSTER 'cluster' ( + id Int64, + str String Codec(AES_128_GCM_SIV) + ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}') + ORDER BY id + """ + ) + +def insert_data(): + node1.query("INSERT INTO tbl VALUES (1, 'str1')") + node2.query("INSERT INTO tbl VALUES (1, 'str1')") # Test deduplication + node2.query("INSERT INTO tbl VALUES (2, 'str2')") + +def optimize_table(): + node1.query("OPTIMIZE TABLE tbl ON CLUSTER 'cluster' FINAL") + +def check_table(): + expected=[[1, 'str1'], [2, 'str2']] + assert node1.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) + assert node2.query("SELECT * FROM tbl ORDER BY id") == TSV(expected) + assert node1.query("CHECK TABLE tbl") == "1\n" + assert node2.query("CHECK TABLE tbl") == "1\n" + + +# Actual tests: + +def test_same_keys(): + copy_keys(node1, 'key_a') + copy_keys(node2, 'key_a') + create_table() + + insert_data() + check_table() + + optimize_table() + check_table() + + +def test_different_keys(): + copy_keys(node1, 'key_a') + copy_keys(node2, 'key_b') + create_table() + + insert_data() + assert "BAD_DECRYPT" in node1.query_and_get_error("SELECT * FROM tbl") + assert "BAD_DECRYPT" in node2.query_and_get_error("SELECT * FROM tbl") + + # Hang? + #optimize_table() + #check_table() + + +def test_different_current_key_ids(): + copy_keys(node1, 'key_a_and_b_current_a') + copy_keys(node2, 'key_a_and_b_current_b') + create_table() + + insert_data() + check_table() + + optimize_table() + check_table() + + +def test_different_nonces(): + copy_keys(node1, 'key_a_and_nonce_x') + copy_keys(node2, 'key_a_and_nonce_y') + create_table() + + insert_data() + check_table() + + optimize_table() + check_table() diff --git a/tests/performance/hashed_array_dictionary.xml b/tests/performance/hashed_array_dictionary.xml new file mode 100644 index 00000000000..a26e654248f --- /dev/null +++ b/tests/performance/hashed_array_dictionary.xml @@ -0,0 +1,126 @@ + + + CREATE TABLE simple_key_hashed_array_dictionary_source_table + ( + id UInt64, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) ENGINE = Memory; + + + + CREATE TABLE complex_key_hashed_array_dictionary_source_table + ( + id UInt64, + id_key String, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) ENGINE = Memory; + + + + CREATE DICTIONARY simple_key_hashed_array_dictionary + ( + id UInt64, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) + PRIMARY KEY id + SOURCE(CLICKHOUSE(DB 'default' TABLE 'simple_key_hashed_array_dictionary_source_table')) + LAYOUT(HASHED_ARRAY()) + LIFETIME(MIN 0 MAX 1000); + + + + CREATE DICTIONARY complex_key_hashed_array_dictionary + ( + id UInt64, + id_key String, + value_int UInt64, + value_string String, + value_decimal Decimal64(8), + value_string_nullable Nullable(String) + ) + PRIMARY KEY id, id_key + SOURCE(CLICKHOUSE(DB 'default' TABLE 'complex_key_hashed_array_dictionary_source_table')) + LAYOUT(COMPLEX_KEY_HASHED_ARRAY()) + LIFETIME(MIN 0 MAX 1000); + + + + INSERT INTO simple_key_hashed_array_dictionary_source_table + SELECT number, number, toString(number), toDecimal64(number, 8), toString(number) + FROM system.numbers + LIMIT 5000000; + + + + INSERT INTO complex_key_hashed_array_dictionary_source_table + SELECT number, toString(number), number, toString(number), toDecimal64(number, 8), toString(number) + FROM system.numbers + LIMIT 5000000; + + + + + column_name + + 'value_int' + 'value_string' + 'value_decimal' + 'value_string_nullable' + + + + + elements_count + + 5000000 + 7500000 + + + + + + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictGet('default.simple_key_hashed_array_dictionary', {column_name}, key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + WITH rand64() % toUInt64({elements_count}) as key + SELECT dictHas('default.simple_key_hashed_array_dictionary', key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + + WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key + SELECT dictGet('default.complex_key_hashed_array_dictionary', {column_name}, key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + WITH (rand64() % toUInt64({elements_count}), toString(rand64() % toUInt64({elements_count}))) as key + SELECT dictHas('default.complex_key_hashed_array_dictionary', key) + FROM system.numbers + LIMIT {elements_count} + FORMAT Null; + + + DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table; + + DROP DICTIONARY IF EXISTS simple_key_hashed_array_dictionary; + DROP DICTIONARY IF EXISTS complex_key_hashed_array_dictionary; + + diff --git a/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh b/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh index 3f5b8d570a6..0ac5a2f748a 100755 --- a/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh +++ b/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=./mergetree_mutations.lib . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} --multiquery << EOF +${CLICKHOUSE_CLIENT} --allow_nondeterministic_mutations=1 --multiquery << EOF DROP TABLE IF EXISTS mutations_r1; DROP TABLE IF EXISTS for_subquery; diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference index c152a92fb83..a47d9a430dc 100644 --- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -146,7 +146,7 @@ SELECT t2_00826.b FROM t1_00826 ALL INNER JOIN t2_00826 ON b = t2_00826.a -WHERE (b = t2_00826.a) AND (t2_00826.b IS NULL OR (t2_00826.b > t2_00826.a)) +WHERE (b = t2_00826.a) AND ((t2_00826.b IS NULL) OR (t2_00826.b > t2_00826.a)) --- do not rewrite alias --- SELECT a AS b FROM t1_00826 @@ -178,4 +178,4 @@ SELECT t2_00826.b FROM t1_00826 ALL INNER JOIN t2_00826 ON a = t2_00826.a -WHERE (a = t2_00826.a) AND (t2_00826.b IS NULL OR (t2_00826.b < 2)) +WHERE (a = t2_00826.a) AND ((t2_00826.b IS NULL) OR (t2_00826.b < 2)) diff --git a/tests/queries/0_stateless/01120_join_constants.reference b/tests/queries/0_stateless/01120_join_constants.reference index a16427fbdf7..91838e7a2bb 100644 --- a/tests/queries/0_stateless/01120_join_constants.reference +++ b/tests/queries/0_stateless/01120_join_constants.reference @@ -1,2 +1,4 @@ 1 hello 1 world world 1 2 hello 0 world 1 +1 321 1 123 123 1 +2 321 0 0 123 1 diff --git a/tests/queries/0_stateless/01120_join_constants.sql b/tests/queries/0_stateless/01120_join_constants.sql index 443559c3ea1..d6d6a1be43b 100644 --- a/tests/queries/0_stateless/01120_join_constants.sql +++ b/tests/queries/0_stateless/01120_join_constants.sql @@ -15,3 +15,21 @@ LEFT JOIN arrayJoin([1, 3]) AS k, 'world' ) AS t2 ON t1.k = t2.k; + +SELECT + t1.*, + t2.*, + 123, + isConstant('world') +FROM +( + SELECT + arrayJoin([1, 2]) AS k, + 321 +) AS t1 +LEFT JOIN +( + SELECT + arrayJoin([1, 3]) AS k, + 123 +) AS t2 ON t1.k = t2.k; diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference index 38ca608ee39..3be800888c7 100644 --- a/tests/queries/0_stateless/01161_information_schema.reference +++ b/tests/queries/0_stateless/01161_information_schema.reference @@ -14,14 +14,14 @@ default default v VIEW tmp LOCAL TEMPORARY default default mv SELECT * FROM system.one NONE NO YES NO NO NO default default v SELECT n, f FROM default.t NONE NO NO NO NO NO -default default mv dummy 1 0 UInt8 \N \N 8 2 0 \N \N \N \N \N \N \N \N \N \N -default default t n 1 0 UInt64 \N \N 64 2 0 \N \N \N \N \N \N \N \N \N \N -default default t f 2 0 Float32 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N -default default t s 3 0 String \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N -default default t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N \N \N \N \N \N \N \N -default default t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N \N \N \N \N \N \N \N -default default v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N \N \N \N \N \N \N \N -default default v f 2 0 Float64 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N - tmp d 1 0 Date \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N - tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N - tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N \N \N \N \N \N \N \N +default default mv dummy 1 0 UInt8 \N \N 8 2 0 \N \N \N \N \N \N \N \N \N \N UInt8 +default default t n 1 0 UInt64 \N \N 64 2 0 \N \N \N \N \N \N \N \N \N \N UInt64 +default default t f 2 0 Float32 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N Float32 +default default t s 3 0 String \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N String +default default t fs 4 0 FixedString(42) 42 42 \N \N \N \N \N \N \N \N \N \N \N \N \N FixedString(42) +default default t d 5 0 Decimal(9, 6) \N \N 9 10 6 \N \N \N \N \N \N \N \N \N \N Decimal(9, 6) +default default v n 1 1 Nullable(Int32) \N \N 32 2 0 \N \N \N \N \N \N \N \N \N \N Nullable(Int32) +default default v f 2 0 Float64 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N Float64 + tmp d 1 0 Date \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N Date + tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N DateTime + tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N \N \N \N \N \N \N \N DateTime64(3) diff --git a/tests/queries/0_stateless/01162_strange_mutations.reference b/tests/queries/0_stateless/01162_strange_mutations.reference new file mode 100644 index 00000000000..55f17cfe464 --- /dev/null +++ b/tests/queries/0_stateless/01162_strange_mutations.reference @@ -0,0 +1,28 @@ +MergeTree +1 +2 +2 +0 +50 6225 0 +50 6225 1900 +ReplicatedMergeTree +1 +2 +2 +0 +50 6225 0 +50 6225 1900 +Memory +1 +2 +2 +0 +50 6225 0 +50 6225 1900 +Join +1 +2 +2 +0 +50 6225 0 +50 6225 0 diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh new file mode 100755 index 00000000000..fecb1b8d8c0 --- /dev/null +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +declare -a engines=("MergeTree order by n" "ReplicatedMergeTree('/test/01162/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX', '1') order by n" "Memory" "Join(ALL, FULL, n)") + +$CLICKHOUSE_CLIENT -q "CREATE OR REPLACE VIEW t1 AS SELECT number * 10 AS id, number * 100 AS value FROM numbers(20)" + +for engine in "${engines[@]}" +do + $CLICKHOUSE_CLIENT -q "drop table if exists t" + $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine" + $CLICKHOUSE_CLIENT -q "select engine from system.tables where database=currentDatabase() and name='t'" + $CLICKHOUSE_CLIENT -q "insert into t values (1)" + $CLICKHOUSE_CLIENT -q "insert into t values (2)" + $CLICKHOUSE_CLIENT -q "select * from t order by n" + $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "alter table t + delete where n global in (select * from (select * from t where n global in (1::Int32)))" + $CLICKHOUSE_CLIENT -q "select * from t order by n" + $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "alter table t + delete where n global in (select t1.n from t as t1 full join t as t2 on t1.n=t2.n where t1.n global in (select 2::Int32))" + $CLICKHOUSE_CLIENT -q "select count() from t" + $CLICKHOUSE_CLIENT -q "drop table t" + + $CLICKHOUSE_CLIENT -q "drop table if exists test" + $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" + $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" + if [[ $engine == *"ReplicatedMergeTree"* ]]; then + $CLICKHOUSE_CLIENT -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "statement with subquery may be nondeterministic" + $CLICKHOUSE_CLIENT --allow_nondeterministic_mutations=1 --mutations_sync=1 -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" + elif [[ $engine == *"Join"* ]]; then + $CLICKHOUSE_CLIENT -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" 2>&1| grep -Fa "DB::Exception: " | grep -Fv "Table engine Join supports only DELETE mutations" + else + $CLICKHOUSE_CLIENT --mutations_sync=1 -q "ALTER TABLE test + UPDATE test = (SELECT groupArray(id) FROM t1 GROUP BY 1)[n - 99] WHERE 1" + fi + $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" + $CLICKHOUSE_CLIENT -q "drop table test" +done diff --git a/tests/queries/0_stateless/01196_max_parser_depth.reference b/tests/queries/0_stateless/01196_max_parser_depth.reference index a72c1b18aa2..072fc270acd 100644 --- a/tests/queries/0_stateless/01196_max_parser_depth.reference +++ b/tests/queries/0_stateless/01196_max_parser_depth.reference @@ -1,3 +1,4 @@ Code: 306 Code: 306 Code: 306 +Code: 306 diff --git a/tests/queries/0_stateless/01236_graphite_mt.reference b/tests/queries/0_stateless/01236_graphite_mt.reference index a30d2495265..0f2e8e81377 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.reference +++ b/tests/queries/0_stateless/01236_graphite_mt.reference @@ -342,3 +342,347 @@ 2 sum_2 98950 1 940 2 sum_2 108950 1 1040 2 sum_2 70170 1 1140 +1 max_1 9 1 0 +1 max_1 19 1 10 +1 max_1 29 1 20 +1 max_1 39 1 30 +1 max_1 49 1 40 +1 max_1 59 1 50 +1 max_1 69 1 60 +1 max_1 79 1 70 +1 max_1 89 1 80 +1 max_1 99 1 90 +1 max_1 109 1 100 +1 max_1 119 1 110 +1 max_1 129 1 120 +1 max_1 139 1 130 +1 max_1 149 1 140 +1 max_1 159 1 150 +1 max_1 169 1 160 +1 max_1 179 1 170 +1 max_1 189 1 180 +1 max_1 199 1 190 +1 max_1 209 1 200 +1 max_1 219 1 210 +1 max_1 229 1 220 +1 max_1 239 1 230 +1 max_1 249 1 240 +1 max_1 259 1 250 +1 max_1 269 1 260 +1 max_1 279 1 270 +1 max_1 289 1 280 +1 max_1 299 1 290 +1 max_1 39 1 0 +1 max_1 139 1 40 +1 max_1 239 1 140 +1 max_1 339 1 240 +1 max_1 439 1 340 +1 max_1 539 1 440 +1 max_1 639 1 540 +1 max_1 739 1 640 +1 max_1 839 1 740 +1 max_1 939 1 840 +1 max_1 1039 1 940 +1 max_1 1139 1 1040 +1 max_1 1199 1 1140 +1 max_2 9 1 0 +1 max_2 19 1 10 +1 max_2 29 1 20 +1 max_2 39 1 30 +1 max_2 49 1 40 +1 max_2 59 1 50 +1 max_2 69 1 60 +1 max_2 79 1 70 +1 max_2 89 1 80 +1 max_2 99 1 90 +1 max_2 109 1 100 +1 max_2 119 1 110 +1 max_2 129 1 120 +1 max_2 139 1 130 +1 max_2 149 1 140 +1 max_2 159 1 150 +1 max_2 169 1 160 +1 max_2 179 1 170 +1 max_2 189 1 180 +1 max_2 199 1 190 +1 max_2 209 1 200 +1 max_2 219 1 210 +1 max_2 229 1 220 +1 max_2 239 1 230 +1 max_2 249 1 240 +1 max_2 259 1 250 +1 max_2 269 1 260 +1 max_2 279 1 270 +1 max_2 289 1 280 +1 max_2 299 1 290 +1 max_2 39 1 0 +1 max_2 139 1 40 +1 max_2 239 1 140 +1 max_2 339 1 240 +1 max_2 439 1 340 +1 max_2 539 1 440 +1 max_2 639 1 540 +1 max_2 739 1 640 +1 max_2 839 1 740 +1 max_2 939 1 840 +1 max_2 1039 1 940 +1 max_2 1139 1 1040 +1 max_2 1199 1 1140 +1 sum_1 45 1 0 +1 sum_1 145 1 10 +1 sum_1 245 1 20 +1 sum_1 345 1 30 +1 sum_1 445 1 40 +1 sum_1 545 1 50 +1 sum_1 645 1 60 +1 sum_1 745 1 70 +1 sum_1 845 1 80 +1 sum_1 945 1 90 +1 sum_1 1045 1 100 +1 sum_1 1145 1 110 +1 sum_1 1245 1 120 +1 sum_1 1345 1 130 +1 sum_1 1445 1 140 +1 sum_1 1545 1 150 +1 sum_1 1645 1 160 +1 sum_1 1745 1 170 +1 sum_1 1845 1 180 +1 sum_1 1945 1 190 +1 sum_1 2045 1 200 +1 sum_1 2145 1 210 +1 sum_1 2245 1 220 +1 sum_1 2345 1 230 +1 sum_1 2445 1 240 +1 sum_1 2545 1 250 +1 sum_1 2645 1 260 +1 sum_1 2745 1 270 +1 sum_1 2845 1 280 +1 sum_1 2945 1 290 +1 sum_1 780 1 0 +1 sum_1 8950 1 40 +1 sum_1 18950 1 140 +1 sum_1 28950 1 240 +1 sum_1 38950 1 340 +1 sum_1 48950 1 440 +1 sum_1 58950 1 540 +1 sum_1 68950 1 640 +1 sum_1 78950 1 740 +1 sum_1 88950 1 840 +1 sum_1 98950 1 940 +1 sum_1 108950 1 1040 +1 sum_1 70170 1 1140 +1 sum_2 45 1 0 +1 sum_2 145 1 10 +1 sum_2 245 1 20 +1 sum_2 345 1 30 +1 sum_2 445 1 40 +1 sum_2 545 1 50 +1 sum_2 645 1 60 +1 sum_2 745 1 70 +1 sum_2 845 1 80 +1 sum_2 945 1 90 +1 sum_2 1045 1 100 +1 sum_2 1145 1 110 +1 sum_2 1245 1 120 +1 sum_2 1345 1 130 +1 sum_2 1445 1 140 +1 sum_2 1545 1 150 +1 sum_2 1645 1 160 +1 sum_2 1745 1 170 +1 sum_2 1845 1 180 +1 sum_2 1945 1 190 +1 sum_2 2045 1 200 +1 sum_2 2145 1 210 +1 sum_2 2245 1 220 +1 sum_2 2345 1 230 +1 sum_2 2445 1 240 +1 sum_2 2545 1 250 +1 sum_2 2645 1 260 +1 sum_2 2745 1 270 +1 sum_2 2845 1 280 +1 sum_2 2945 1 290 +1 sum_2 780 1 0 +1 sum_2 8950 1 40 +1 sum_2 18950 1 140 +1 sum_2 28950 1 240 +1 sum_2 38950 1 340 +1 sum_2 48950 1 440 +1 sum_2 58950 1 540 +1 sum_2 68950 1 640 +1 sum_2 78950 1 740 +1 sum_2 88950 1 840 +1 sum_2 98950 1 940 +1 sum_2 108950 1 1040 +1 sum_2 70170 1 1140 +2 max_1 9 1 0 +2 max_1 19 1 10 +2 max_1 29 1 20 +2 max_1 39 1 30 +2 max_1 49 1 40 +2 max_1 59 1 50 +2 max_1 69 1 60 +2 max_1 79 1 70 +2 max_1 89 1 80 +2 max_1 99 1 90 +2 max_1 109 1 100 +2 max_1 119 1 110 +2 max_1 129 1 120 +2 max_1 139 1 130 +2 max_1 149 1 140 +2 max_1 159 1 150 +2 max_1 169 1 160 +2 max_1 179 1 170 +2 max_1 189 1 180 +2 max_1 199 1 190 +2 max_1 209 1 200 +2 max_1 219 1 210 +2 max_1 229 1 220 +2 max_1 239 1 230 +2 max_1 249 1 240 +2 max_1 259 1 250 +2 max_1 269 1 260 +2 max_1 279 1 270 +2 max_1 289 1 280 +2 max_1 299 1 290 +2 max_1 39 1 0 +2 max_1 139 1 40 +2 max_1 239 1 140 +2 max_1 339 1 240 +2 max_1 439 1 340 +2 max_1 539 1 440 +2 max_1 639 1 540 +2 max_1 739 1 640 +2 max_1 839 1 740 +2 max_1 939 1 840 +2 max_1 1039 1 940 +2 max_1 1139 1 1040 +2 max_1 1199 1 1140 +2 max_2 9 1 0 +2 max_2 19 1 10 +2 max_2 29 1 20 +2 max_2 39 1 30 +2 max_2 49 1 40 +2 max_2 59 1 50 +2 max_2 69 1 60 +2 max_2 79 1 70 +2 max_2 89 1 80 +2 max_2 99 1 90 +2 max_2 109 1 100 +2 max_2 119 1 110 +2 max_2 129 1 120 +2 max_2 139 1 130 +2 max_2 149 1 140 +2 max_2 159 1 150 +2 max_2 169 1 160 +2 max_2 179 1 170 +2 max_2 189 1 180 +2 max_2 199 1 190 +2 max_2 209 1 200 +2 max_2 219 1 210 +2 max_2 229 1 220 +2 max_2 239 1 230 +2 max_2 249 1 240 +2 max_2 259 1 250 +2 max_2 269 1 260 +2 max_2 279 1 270 +2 max_2 289 1 280 +2 max_2 299 1 290 +2 max_2 39 1 0 +2 max_2 139 1 40 +2 max_2 239 1 140 +2 max_2 339 1 240 +2 max_2 439 1 340 +2 max_2 539 1 440 +2 max_2 639 1 540 +2 max_2 739 1 640 +2 max_2 839 1 740 +2 max_2 939 1 840 +2 max_2 1039 1 940 +2 max_2 1139 1 1040 +2 max_2 1199 1 1140 +2 sum_1 45 1 0 +2 sum_1 145 1 10 +2 sum_1 245 1 20 +2 sum_1 345 1 30 +2 sum_1 445 1 40 +2 sum_1 545 1 50 +2 sum_1 645 1 60 +2 sum_1 745 1 70 +2 sum_1 845 1 80 +2 sum_1 945 1 90 +2 sum_1 1045 1 100 +2 sum_1 1145 1 110 +2 sum_1 1245 1 120 +2 sum_1 1345 1 130 +2 sum_1 1445 1 140 +2 sum_1 1545 1 150 +2 sum_1 1645 1 160 +2 sum_1 1745 1 170 +2 sum_1 1845 1 180 +2 sum_1 1945 1 190 +2 sum_1 2045 1 200 +2 sum_1 2145 1 210 +2 sum_1 2245 1 220 +2 sum_1 2345 1 230 +2 sum_1 2445 1 240 +2 sum_1 2545 1 250 +2 sum_1 2645 1 260 +2 sum_1 2745 1 270 +2 sum_1 2845 1 280 +2 sum_1 2945 1 290 +2 sum_1 780 1 0 +2 sum_1 8950 1 40 +2 sum_1 18950 1 140 +2 sum_1 28950 1 240 +2 sum_1 38950 1 340 +2 sum_1 48950 1 440 +2 sum_1 58950 1 540 +2 sum_1 68950 1 640 +2 sum_1 78950 1 740 +2 sum_1 88950 1 840 +2 sum_1 98950 1 940 +2 sum_1 108950 1 1040 +2 sum_1 70170 1 1140 +2 sum_2 45 1 0 +2 sum_2 145 1 10 +2 sum_2 245 1 20 +2 sum_2 345 1 30 +2 sum_2 445 1 40 +2 sum_2 545 1 50 +2 sum_2 645 1 60 +2 sum_2 745 1 70 +2 sum_2 845 1 80 +2 sum_2 945 1 90 +2 sum_2 1045 1 100 +2 sum_2 1145 1 110 +2 sum_2 1245 1 120 +2 sum_2 1345 1 130 +2 sum_2 1445 1 140 +2 sum_2 1545 1 150 +2 sum_2 1645 1 160 +2 sum_2 1745 1 170 +2 sum_2 1845 1 180 +2 sum_2 1945 1 190 +2 sum_2 2045 1 200 +2 sum_2 2145 1 210 +2 sum_2 2245 1 220 +2 sum_2 2345 1 230 +2 sum_2 2445 1 240 +2 sum_2 2545 1 250 +2 sum_2 2645 1 260 +2 sum_2 2745 1 270 +2 sum_2 2845 1 280 +2 sum_2 2945 1 290 +2 sum_2 780 1 0 +2 sum_2 8950 1 40 +2 sum_2 18950 1 140 +2 sum_2 28950 1 240 +2 sum_2 38950 1 340 +2 sum_2 48950 1 440 +2 sum_2 58950 1 540 +2 sum_2 68950 1 640 +2 sum_2 78950 1 740 +2 sum_2 88950 1 840 +2 sum_2 98950 1 940 +2 sum_2 108950 1 1040 +2 sum_2 70170 1 1140 diff --git a/tests/queries/0_stateless/01236_graphite_mt.sql b/tests/queries/0_stateless/01236_graphite_mt.sql index a6dd4b8b6fb..0ec905fa0a8 100644 --- a/tests/queries/0_stateless/01236_graphite_mt.sql +++ b/tests/queries/0_stateless/01236_graphite_mt.sql @@ -32,6 +32,8 @@ WITH dates AS select 1, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200) union all select 2, 'max_2', older_date - number * 60 - 30, number, 1, number from dates, numbers(1200); +select key, Path, Value, Version, col from test_graphite final order by key, Path, Time desc; + optimize table test_graphite final; select key, Path, Value, Version, col from test_graphite order by key, Path, Time desc; diff --git a/tests/queries/0_stateless/01300_read_wkt.sql b/tests/queries/0_stateless/01300_read_wkt.sql index 8121bdf6084..1995c5153d7 100644 --- a/tests/queries/0_stateless/01300_read_wkt.sql +++ b/tests/queries/0_stateless/01300_read_wkt.sql @@ -1,14 +1,14 @@ -SELECT readWktPoint('POINT(0 0)'); -SELECT readWktPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); -SELECT readWktPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); -SELECT readWktMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); +SELECT readWKTPoint('POINT(0 0)'); +SELECT readWKTPolygon('POLYGON((1 0,10 0,10 10,0 10,1 0))'); +SELECT readWKTPolygon('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))'); +SELECT readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('POINT(0 0)', 1); INSERT INTO geo VALUES ('POINT(1 0)', 2); INSERT INTO geo VALUES ('POINT(2 0)', 3); -SELECT readWktPoint(s) FROM geo ORDER BY id; +SELECT readWKTPoint(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); @@ -18,13 +18,13 @@ INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0))', 3); INSERT INTO geo VALUES ('POLYGON((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4))', 4); INSERT INTO geo VALUES ('POLYGON((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4))', 5); INSERT INTO geo VALUES ('POLYGON((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4))', 6); -SELECT readWktPolygon(s) FROM geo ORDER BY id; +SELECT readWKTPolygon(s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (s String, id Int) engine=Memory(); INSERT INTO geo VALUES ('MULTIPOLYGON(((1 0,10 0,10 10,0 10,1 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 1); INSERT INTO geo VALUES ('MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 2); INSERT INTO geo VALUES ('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))', 3); -SELECT readWktMultiPolygon(s) FROM geo ORDER BY id; +SELECT readWKTMultiPolygon(s) FROM geo ORDER BY id; DROP TABLE geo; diff --git a/tests/queries/0_stateless/01300_svg.sql b/tests/queries/0_stateless/01300_svg.sql index a1deb1745c3..cf794f2190b 100644 --- a/tests/queries/0_stateless/01300_svg.sql +++ b/tests/queries/0_stateless/01300_svg.sql @@ -1,50 +1,50 @@ -SELECT svg((0., 0.)); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)]); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); -SELECT svg((0., 0.), 'b'); -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); +SELECT SVG((0., 0.)); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)]); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]]); +SELECT SVG((0., 0.), 'b'); +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b'); +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], 'b'); +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b'); DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Tuple(Float64, Float64), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ((0., 0.), 'b', 1); INSERT INTO geo VALUES ((1., 0.), 'c', 2); INSERT INTO geo VALUES ((2., 0.), 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg((0., 0.), s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG((0., 0.), s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Tuple(Float64, Float64)), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([(0., 0.), (10, 0), (10, 10), (0, 10)], 'b', 1); INSERT INTO geo VALUES ([(1., 0.), (10, 0), (10, 10), (0, 10)], 'c', 2); INSERT INTO geo VALUES ([(2., 0.), (10, 0), (10, 10), (0, 10)], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Tuple(Float64, Float64))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'b', 1); INSERT INTO geo VALUES ([[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'c', 2); INSERT INTO geo VALUES ([[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4, 4), (5, 4), (5, 5), (4, 5)]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE IF EXISTS geo; CREATE TABLE geo (p Array(Array(Array(Tuple(Float64, Float64)))), s String, id Int) engine=Memory(); INSERT INTO geo VALUES ([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'b', 1); INSERT INTO geo VALUES ([[[(1., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'c', 2); INSERT INTO geo VALUES ([[[(2., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], 'd', 3); -SELECT svg(p) FROM geo ORDER BY id; -SELECT svg(p, 'b') FROM geo ORDER BY id; -SELECT svg([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; -SELECT svg(p, s) FROM geo ORDER BY id; +SELECT SVG(p) FROM geo ORDER BY id; +SELECT SVG(p, 'b') FROM geo ORDER BY id; +SELECT SVG([[[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]], [[(-10., -10.), (-10, -9), (-9, 10)]]], s) FROM geo ORDER BY id; +SELECT SVG(p, s) FROM geo ORDER BY id; DROP TABLE geo; diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index f88d890f33c..00c153ec915 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -5,14 +5,14 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT --xyzgarbage 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "Code: 552" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' xyzgarbage 2>&1 | grep -q "BAD_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external -xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' -cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "Bad arguments" && echo 'OK' || echo 'FAIL' +cat /etc/passwd | sed 's/:/\t/g' | $CLICKHOUSE_CLIENT --query="SELECT shell, count() AS c FROM passwd GROUP BY shell ORDER BY c DESC" --external --xyzgarbage --file=- --name=passwd --structure='login String, unused String, uid UInt16, gid UInt16, comment String, home String, shell String' 2>&1 | grep -q "UNRECOGNIZED_ARGUMENTS" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql index 7d0993c1bfc..1387206b882 100644 --- a/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql +++ b/tests/queries/0_stateless/01425_decimal_parse_big_negative_exponent.sql @@ -1,4 +1,4 @@ -SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 72 } +SELECT '-1E9-1E9-1E9-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '-1E9' AS x, toDecimal32(x, 0); -- { serverError 69 } SELECT '1E-9' AS x, toDecimal32(x, 0); SELECT '1E-8' AS x, toDecimal32(x, 0); diff --git a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference index 52dea650ebc..e9b7db9d530 100644 --- a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference +++ b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.reference @@ -1,10 +1,6 @@ -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 -100 0 99 4950 +30 0 54 810 +30 0 54 810 +30 0 54 810 +30 0 54 810 +30 0 54 810 +30 0 54 810 diff --git a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh index 6533eeb12f5..030ae017e71 100755 --- a/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh +++ b/tests/queries/0_stateless/01509_check_many_parallel_quorum_inserts_long.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -NUM_REPLICAS=10 +NUM_REPLICAS=6 for i in $(seq 1 $NUM_REPLICAS); do $CLICKHOUSE_CLIENT -n -q " @@ -20,17 +20,17 @@ done function thread { i=0 retries=300 while [[ $i -lt $retries ]]; do # server can be dead - $CLICKHOUSE_CLIENT --insert_quorum 5 --insert_quorum_parallel 1 --query "INSERT INTO r$1 SELECT $2" && break + $CLICKHOUSE_CLIENT --insert_quorum 3 --insert_quorum_parallel 1 --query "INSERT INTO r$1 SELECT $2" && break ((++i)) sleep 0.1 done } for i in $(seq 1 $NUM_REPLICAS); do - for j in {0..9}; do + for j in {0..4}; do a=$((($i - 1) * 10 + $j)) - # Note: making 100 connections simultaneously is a mini-DoS when server is build with sanitizers and CI environment is overloaded. + # Note: making 30 connections simultaneously is a mini-DoS when server is build with sanitizers and CI environment is overloaded. # That's why we repeat "socket timeout" errors. thread $i $a 2>&1 | grep -v -P 'SOCKET_TIMEOUT|NETWORK_ERROR|^$' & done @@ -46,5 +46,5 @@ for i in $(seq 1 $NUM_REPLICAS); do done for i in $(seq 1 $NUM_REPLICAS); do - $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS r$i;" + $CLICKHOUSE_CLIENT -n -q "DROP TABLE IF EXISTS r$i SYNC;" done diff --git a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh index d63765fc179..c1d5c357308 100755 --- a/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh +++ b/tests/queries/0_stateless/01527_clickhouse_local_optimize.sh @@ -10,6 +10,6 @@ rm -rf "${WORKING_FOLDER_01527}" mkdir -p "${WORKING_FOLDER_01527}" # OPTIMIZE was crashing due to lack of temporary volume in local -${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" -- --path="${WORKING_FOLDER_01527}" +${CLICKHOUSE_LOCAL} --query "drop database if exists d; create database d; create table d.t engine MergeTree order by a as select 1 a; optimize table d.t final" --path="${WORKING_FOLDER_01527}" rm -rf "${WORKING_FOLDER_01527}" diff --git a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh index 8684582ad45..95ecbf09cf5 100755 --- a/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh +++ b/tests/queries/0_stateless/01528_clickhouse_local_prepare_parts.sh @@ -36,10 +36,10 @@ ATTACH TABLE local.data_csv (id UInt64, d Date, s String) Engine=File(CSV, '${WO EOF ## feed the table -${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "INSERT INTO local.test SELECT * FROM local.data_csv;" --path="${WORKING_FOLDER_01528}" ## check the parts were created -${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" -- --path="${WORKING_FOLDER_01528}" +${CLICKHOUSE_LOCAL} --query "SELECT * FROM local.test WHERE id < 10 ORDER BY id;" --path="${WORKING_FOLDER_01528}" ################# @@ -49,36 +49,36 @@ cat < "${WORKING_FOLDER_01528}/metadata/local/stdin.sql" ATTACH TABLE local.stdin (id UInt64, d Date, s String) Engine=File(CSV, stdin); EOF -cat < sorting steps should know about limit Limit 10 -MergingSorted -Limit 10 -MergeSorting -Limit 10 -PartialSorting +Sorting +Sorting Limit 10 -- filter push down -- > filter should be pushed down after aggregating @@ -108,9 +105,8 @@ Filter column: notEquals(y, 2) 1 0 1 1 > filter is pushed down before sorting steps -MergingSorted -MergeSorting -PartialSorting +Sorting +Sorting Filter column: and(notEquals(x, 0), notEquals(y, 0)) 1 2 1 1 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index b835bae0e27..de3d3ac3eb6 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -10,7 +10,7 @@ $CLICKHOUSE_CLIENT -q "select x + 1 from (select y + 2 as x from (select dummy + echo "> sipHash should be calculated after filtration" $CLICKHOUSE_CLIENT -q "explain actions = 1 select sum(x), sum(y) from (select sipHash64(number) as x, bitAnd(number, 1024) as y from numbers_mt(1000000000) limit 1000000000) where y = 0" | grep -o "FUNCTION sipHash64\|Filter column: equals" echo "> sorting steps should know about limit" -$CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Limit 10" +$CLICKHOUSE_CLIENT -q "explain actions = 1 select number from (select number from numbers(500000000) order by -number) limit 10" | grep -o "Sorting\|Limit 10" echo "-- filter push down --" echo "> filter should be pushed down after aggregating" @@ -132,7 +132,7 @@ $CLICKHOUSE_CLIENT -q " select number % 2 as x, number % 3 as y from numbers(6) order by y desc ) where x != 0 and y != 0 settings enable_optimize_predicate_expression = 0" | - grep -o "MergingSorted\|MergeSorting\|PartialSorting\|Filter column: and(notEquals(x, 0), notEquals(y, 0))" + grep -o "Sorting\|Filter column: and(notEquals(x, 0), notEquals(y, 0))" $CLICKHOUSE_CLIENT -q " select x, y from ( select number % 2 as x, number % 3 as y from numbers(6) order by y desc diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index ad9b87b998d..da7d2fbe2bd 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -3,3 +3,9 @@ 1 9999 5000 0 9998 5000 1 +0 +0 +0 +0 9999 +0 9999 +3 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 58af11f01f7..b7077de1fe6 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -15,3 +15,37 @@ select min(i), max(i), count() from d where _partition_value.1 = 10 group by _pa select min(i) from d where 1 = _partition_value.1; drop table d; + +drop table if exists no_final_mark; +drop table if exists has_final_mark; +drop table if exists mixed_final_mark; + +create table no_final_mark (i int, j int) engine MergeTree partition by i % 2 order by j settings index_granularity = 10, write_final_mark = 0; +create table has_final_mark (i int, j int) engine MergeTree partition by i % 2 order by j settings index_granularity = 10, write_final_mark = 1; +create table mixed_final_mark (i int, j int) engine MergeTree partition by i % 2 order by j settings index_granularity = 10; + +set max_rows_to_read = 100000; + +insert into no_final_mark select number, number from numbers(10000); +insert into has_final_mark select number, number from numbers(10000); + +alter table mixed_final_mark attach partition 0 from no_final_mark; +alter table mixed_final_mark attach partition 1 from has_final_mark; + +set max_rows_to_read = 2; + +select min(j) from no_final_mark; +select min(j) from has_final_mark; +select min(j) from mixed_final_mark; + +select min(j), max(j) from no_final_mark; -- {serverError TOO_MANY_ROWS} +select min(j), max(j) from has_final_mark; + +set max_rows_to_read = 5001; -- one normal part 5000 + one minmax_count_projection part 1 +select min(j), max(j) from mixed_final_mark; + +-- The first primary expr is the same of some partition column +drop table if exists t; +create table t (server_date Date, something String) engine MergeTree partition by (toYYYYMM(server_date), server_date) order by (server_date, something); +insert into t values ('2019-01-01', 'test1'), ('2019-02-01', 'test2'), ('2019-03-01', 'test3'); +select count() from t; diff --git a/tests/queries/0_stateless/01710_projection_with_joins.reference b/tests/queries/0_stateless/01710_projection_with_joins.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql new file mode 100644 index 00000000000..fcd1c586fa3 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -0,0 +1,8 @@ +drop table if exists t; + +create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; + +select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 1; +select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 0; + +drop table t; diff --git a/tests/queries/0_stateless/01823_explain_json.reference b/tests/queries/0_stateless/01823_explain_json.reference index 5c7845a22d5..f75cd69dbf3 100644 --- a/tests/queries/0_stateless/01823_explain_json.reference +++ b/tests/queries/0_stateless/01823_explain_json.reference @@ -111,31 +111,3 @@ } ], "Limit": 3, --- - "Sort Description": [ - { - "Column": "number", - "Ascending": false, - "With Fill": false - }, - { - "Column": "plus(number, 1)", - "Ascending": true, - "With Fill": false - } - ], - "Limit": 3, --- - "Sort Description": [ - { - "Column": "number", - "Ascending": false, - "With Fill": false - }, - { - "Column": "plus(number, 1)", - "Ascending": true, - "With Fill": false - } - ], - "Limit": 3, diff --git a/tests/queries/0_stateless/01856_create_function.sql b/tests/queries/0_stateless/01856_create_function.sql index 10f87f4a3df..cdc4baad1af 100644 --- a/tests/queries/0_stateless/01856_create_function.sql +++ b/tests/queries/0_stateless/01856_create_function.sql @@ -4,7 +4,6 @@ CREATE FUNCTION 01856_test_function_0 AS (a, b, c) -> a * b * c; SELECT 01856_test_function_0(2, 3, 4); SELECT isConstant(01856_test_function_0(1, 2, 3)); DROP FUNCTION 01856_test_function_0; -CREATE FUNCTION 01856_test_function_1 AS (a, b) -> a || b || c; --{serverError 47} CREATE FUNCTION 01856_test_function_1 AS (a, b) -> 01856_test_function_1(a, b) + 01856_test_function_1(a, b); --{serverError 611} CREATE FUNCTION cast AS a -> a + 1; --{serverError 609} CREATE FUNCTION sum AS (a, b) -> a + b; --{serverError 609} diff --git a/tests/queries/0_stateless/01888_read_int_safe.sql b/tests/queries/0_stateless/01888_read_int_safe.sql index 3caa4878aba..3aea8e38ab0 100644 --- a/tests/queries/0_stateless/01888_read_int_safe.sql +++ b/tests/queries/0_stateless/01888_read_int_safe.sql @@ -3,8 +3,8 @@ select toInt64('+-1'); -- { serverError 72; } select toInt64('++1'); -- { serverError 72; } select toInt64('++'); -- { serverError 72; } select toInt64('+'); -- { serverError 72; } -select toInt64('1+1'); -- { serverError 72; } -select toInt64('1-1'); -- { serverError 72; } +select toInt64('1+1'); -- { serverError 6; } +select toInt64('1-1'); -- { serverError 6; } select toInt64(''); -- { serverError 32; } select toInt64('1'); select toInt64('-1'); diff --git a/tests/queries/0_stateless/01889_sql_json_functions.sql b/tests/queries/0_stateless/01889_sql_json_functions.sql index 0c4f3aff9a3..087f029e635 100644 --- a/tests/queries/0_stateless/01889_sql_json_functions.sql +++ b/tests/queries/0_stateless/01889_sql_json_functions.sql @@ -1,46 +1,46 @@ -- Tags: no-fasttest SELECT '--JSON_VALUE--'; -SELECT JSON_VALUE('$', '{"hello":1}'); -- root is a complex object => default value (empty string) -SELECT JSON_VALUE('$.hello', '{"hello":1}'); -SELECT JSON_VALUE('$.hello', '{"hello":1.2}'); -SELECT JSON_VALUE('$.hello', '{"hello":true}'); -SELECT JSON_VALUE('$.hello', '{"hello":"world"}'); -SELECT JSON_VALUE('$.hello', '{"hello":null}'); -SELECT JSON_VALUE('$.hello', '{"hello":["world","world2"]}'); -SELECT JSON_VALUE('$.hello', '{"hello":{"world":"!"}}'); -SELECT JSON_VALUE('$.hello', '{hello:world}'); -- invalid json => default value (empty string) -SELECT JSON_VALUE('$.hello', ''); +SELECT JSON_VALUE('{"hello":1}', '$'); -- root is a complex object => default value (empty string) +SELECT JSON_VALUE('{"hello":1}', '$.hello'); +SELECT JSON_VALUE('{"hello":1.2}', '$.hello'); +SELECT JSON_VALUE('{"hello":true}', '$.hello'); +SELECT JSON_VALUE('{"hello":"world"}', '$.hello'); +SELECT JSON_VALUE('{"hello":null}', '$.hello'); +SELECT JSON_VALUE('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_VALUE('{hello:world}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_VALUE('', '$.hello'); SELECT '--JSON_QUERY--'; -SELECT JSON_QUERY('$', '{"hello":1}'); -SELECT JSON_QUERY('$.hello', '{"hello":1}'); -SELECT JSON_QUERY('$.hello', '{"hello":1.2}'); -SELECT JSON_QUERY('$.hello', '{"hello":true}'); -SELECT JSON_QUERY('$.hello', '{"hello":"world"}'); -SELECT JSON_QUERY('$.hello', '{"hello":null}'); -SELECT JSON_QUERY('$.hello', '{"hello":["world","world2"]}'); -SELECT JSON_QUERY('$.hello', '{"hello":{"world":"!"}}'); -SELECT JSON_QUERY('$.hello', '{hello:{"world":"!"}}}'); -- invalid json => default value (empty string) -SELECT JSON_QUERY('$.hello', ''); -SELECT JSON_QUERY('$.array[*][0 to 2, 4]', '{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}'); +SELECT JSON_QUERY('{"hello":1}', '$'); +SELECT JSON_QUERY('{"hello":1}', '$.hello'); +SELECT JSON_QUERY('{"hello":1.2}', '$.hello'); +SELECT JSON_QUERY('{"hello":true}', '$.hello'); +SELECT JSON_QUERY('{"hello":"world"}', '$.hello'); +SELECT JSON_QUERY('{"hello":null}', '$.hello'); +SELECT JSON_QUERY('{"hello":["world","world2"]}', '$.hello'); +SELECT JSON_QUERY('{"hello":{"world":"!"}}', '$.hello'); +SELECT JSON_QUERY( '{hello:{"world":"!"}}}', '$.hello'); -- invalid json => default value (empty string) +SELECT JSON_QUERY('', '$.hello'); +SELECT JSON_QUERY('{"array":[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]]}', '$.array[*][0 to 2, 4]'); SELECT '--JSON_EXISTS--'; -SELECT JSON_EXISTS('$', '{"hello":1}'); -SELECT JSON_EXISTS('$', ''); -SELECT JSON_EXISTS('$', '{}'); -SELECT JSON_EXISTS('$.hello', '{"hello":1}'); -SELECT JSON_EXISTS('$.world', '{"hello":1,"world":2}'); -SELECT JSON_EXISTS('$.world', '{"hello":{"world":1}}'); -SELECT JSON_EXISTS('$.hello.world', '{"hello":{"world":1}}'); -SELECT JSON_EXISTS('$.hello', '{hello:world}'); -- invalid json => default value (zero integer) -SELECT JSON_EXISTS('$.hello', ''); -SELECT JSON_EXISTS('$.hello[*]', '{"hello":["world"]}'); -SELECT JSON_EXISTS('$.hello[0]', '{"hello":["world"]}'); -SELECT JSON_EXISTS('$.hello[1]', '{"hello":["world"]}'); -SELECT JSON_EXISTS('$.a[*].b', '{"a":[{"b":1},{"c":2}]}'); -SELECT JSON_EXISTS('$.a[*].f', '{"a":[{"b":1},{"c":2}]}'); -SELECT JSON_EXISTS('$.a[*][0].h', '{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}'); +SELECT JSON_EXISTS('{"hello":1}', '$'); +SELECT JSON_EXISTS('', '$'); +SELECT JSON_EXISTS('{}', '$'); +SELECT JSON_EXISTS('{"hello":1}', '$.hello'); +SELECT JSON_EXISTS('{"hello":1,"world":2}', '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}', '$.world'); +SELECT JSON_EXISTS('{"hello":{"world":1}}', '$.hello.world'); +SELECT JSON_EXISTS('{hello:world}', '$.hello'); -- invalid json => default value (zero integer) +SELECT JSON_EXISTS('', '$.hello'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); +SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[1]'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}', '$.a[*].b'); +SELECT JSON_EXISTS('{"a":[{"b":1},{"c":2}]}', '$.a[*].f'); +SELECT JSON_EXISTS('{"a":[[{"b":1}, {"g":1}],[{"h":1},{"y":1}]]}', '$.a[*][0].h'); SELECT '--MANY ROWS--'; DROP TABLE IF EXISTS 01889_sql_json; @@ -48,5 +48,5 @@ CREATE TABLE 01889_sql_json (id UInt8, json String) ENGINE = MergeTree ORDER BY INSERT INTO 01889_sql_json(id, json) VALUES(0, '{"name":"Ivan","surname":"Ivanov","friends":["Vasily","Kostya","Artyom"]}'); INSERT INTO 01889_sql_json(id, json) VALUES(1, '{"name":"Katya","surname":"Baltica","friends":["Tihon","Ernest","Innokentiy"]}'); INSERT INTO 01889_sql_json(id, json) VALUES(2, '{"name":"Vitali","surname":"Brown","friends":["Katya","Anatoliy","Ivan","Oleg"]}'); -SELECT id, JSON_QUERY('$.friends[0 to 2]', json) FROM 01889_sql_json ORDER BY id; +SELECT id, JSON_QUERY(json, '$.friends[0 to 2]') FROM 01889_sql_json ORDER BY id; DROP TABLE 01889_sql_json; diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.reference b/tests/queries/0_stateless/01915_json_extract_raw_string.reference index 3a41f35710c..e88c7e018d2 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.reference +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.reference @@ -1,2 +1,5 @@ ('123','456','[7,8,9]') \N +123 +123 + diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql index 884c599c206..98bff692d71 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.sql +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -3,3 +3,7 @@ select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); with '{"string_value":null}' as json select JSONExtract(json, 'string_value', 'Nullable(String)'); + +select JSONExtractString('{"a": 123}', 'a'); +select JSONExtractString('{"a": "123"}', 'a'); +select JSONExtractString('{"a": null}', 'a'); diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference b/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference deleted file mode 100644 index d00491fd7e5..00000000000 --- a/tests/queries/0_stateless/01939_network_send_bytes_metrics.reference +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh b/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh deleted file mode 100755 index e862a273de4..00000000000 --- a/tests/queries/0_stateless/01939_network_send_bytes_metrics.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" - -${CLICKHOUSE_CLIENT} --query "SELECT number FROM numbers(1000)" > /dev/null - -${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; - WITH ProfileEvents['NetworkSendBytes'] AS bytes - SELECT bytes >= 8000 AND bytes < 9000 ? 1 : bytes FROM system.query_log - WHERE current_database = currentDatabase() AND query_kind = 'Select' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" - -${CLICKHOUSE_CLIENT} --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index ca3bbc4cb33..d0e7a9ef15b 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -2,31 +2,27 @@ explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union Limit (preliminary LIMIT (with OFFSET)) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference index 10787068f43..b23631395ff 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference @@ -50,66 +50,58 @@ SettingQuotaAndLimits (Set limits and quota after reading from storage) explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized Expression (Projection) Distinct - MergingSorted (Merge sorted streams for ORDER BY, without aggregation) + Sorting (Merge sorted streams for ORDER BY, without aggregation) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized Expression (Projection) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union Distinct - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Distinct (Preliminary DISTINCT) - Expression (Before ORDER BY) + Sorting (Sorting for ORDER BY) + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized Expression (Projection) LimitBy Expression (Before LIMIT BY) - MergingSorted (Merge sorted streams for ORDER BY, without aggregation) + Sorting (Merge sorted streams for ORDER BY, without aggregation) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union LimitBy Expression (Before LIMIT BY) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) - Expression (Before ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) -explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized -Expression (Projection) - MergingSorted (Merge sorted streams after aggregation stage for ORDER BY) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - Union - LimitBy - Expression (Before LIMIT BY) - MergingSorted (Merge sorted streams for ORDER BY) - MergeSorting (Merge sorted blocks for ORDER BY) - PartialSorting (Sort each block for ORDER BY) + Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) SettingQuotaAndLimits (Set limits and quota after reading from storage) ReadFromStorage (SystemNumbers) + ReadFromRemote (Read from remote replica) +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +Expression (Projection) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Union + LimitBy + Expression (Before LIMIT BY) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + Expression ((Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY)))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/02027_ngrams.reference b/tests/queries/0_stateless/02027_ngrams.reference index 4ddc2a96c42..4c6afcdf02c 100644 --- a/tests/queries/0_stateless/02027_ngrams.reference +++ b/tests/queries/0_stateless/02027_ngrams.reference @@ -1,20 +1,20 @@ ['T','e','s','t'] ['Te','es','st'] ['Tes','est'] -['Tes','est'] +['Test'] [] ['T','e','s','t'] ['Te','es','st'] ['Tes','est'] -['Tes','est'] +['Test'] [] ['T','e','s','t'] ['Te','es','st'] ['Tes','est'] -['Tes','est'] +['Test'] [] ['T','e','s','t'] ['Te','es','st'] ['Tes','est'] -['Tes','est'] +['Test'] [] diff --git a/tests/queries/0_stateless/02027_ngrams.sql b/tests/queries/0_stateless/02027_ngrams.sql index b2ea9facf64..b9ce36272d8 100644 --- a/tests/queries/0_stateless/02027_ngrams.sql +++ b/tests/queries/0_stateless/02027_ngrams.sql @@ -1,23 +1,23 @@ SELECT ngrams('Test', 1); SELECT ngrams('Test', 2); SELECT ngrams('Test', 3); -SELECT ngrams('Test', 3); +SELECT ngrams('Test', 4); SELECT ngrams('Test', 5); SELECT ngrams(materialize('Test'), 1); SELECT ngrams(materialize('Test'), 2); SELECT ngrams(materialize('Test'), 3); -SELECT ngrams(materialize('Test'), 3); +SELECT ngrams(materialize('Test'), 4); SELECT ngrams(materialize('Test'), 5); SELECT ngrams(toFixedString('Test', 4), 1); SELECT ngrams(toFixedString('Test', 4), 2); SELECT ngrams(toFixedString('Test', 4), 3); -SELECT ngrams(toFixedString('Test', 4), 3); +SELECT ngrams(toFixedString('Test', 4), 4); SELECT ngrams(toFixedString('Test', 4), 5); SELECT ngrams(materialize(toFixedString('Test', 4)), 1); SELECT ngrams(materialize(toFixedString('Test', 4)), 2); SELECT ngrams(materialize(toFixedString('Test', 4)), 3); -SELECT ngrams(materialize(toFixedString('Test', 4)), 3); +SELECT ngrams(materialize(toFixedString('Test', 4)), 4); SELECT ngrams(materialize(toFixedString('Test', 4)), 5); diff --git a/tests/queries/0_stateless/02030_capnp_format.reference b/tests/queries/0_stateless/02030_capnp_format.reference new file mode 100644 index 00000000000..8c3c81b5bc3 --- /dev/null +++ b/tests/queries/0_stateless/02030_capnp_format.reference @@ -0,0 +1,53 @@ +-1 1 -1000 1000 -10000000 1000000 -1000000000 1000000000 123.123 123123123.12312312 Some string fixed Some data 2000-01-06 2000-06-01 19:42:42 2000-04-01 11:21:33.123 +-1 1 -1000 1000 -10000000 1000000 -1000000000 1000000000 123.123 123123123.12312312 Some string fixed Some data 2000-01-06 2000-06-01 19:42:42 2000-04-01 11:21:33.123 +1 (2,(3,4)) (((5))) +1 (2,(3,4)) (((5))) +1 [1,2,3] [[[1,2,3],[4,5,6]],[[7,8,9],[]],[]] +1 [1,2,3] [[[1,2,3],[4,5,6]],[[7,8,9],[]],[]] +1 ((2,[[3,4],[5,6],[]]),[([[(7,8),(9,10)],[(11,12),(13,14)],[]],[([15,16,17]),([])])]) +1 ((2,[[3,4],[5,6],[]]),[([[(7,8),(9,10)],[(11,12),(13,14)],[]],[([15,16,17]),([])])]) +[1,2,3] [[4,5,6],[],[7,8]] [(9,10),(11,12),(13,14)] +[1,2,3] [[4,5,6],[],[7,8]] [(9,10),(11,12),(13,14)] +1 [1,NULL,2] (1) +\N [NULL,NULL,42] (NULL) +1 [1,NULL,2] (1) +\N [NULL,NULL,42] (NULL) +one +two +tHrEe +oNe +tWo +threE +first +second +third +OK +OK +OK +OK +one two ['one',NULL,'two',NULL] +two \N [NULL] +one two ['one',NULL,'two',NULL] +two \N [NULL] +0 1 2 +1 2 3 +2 3 4 +3 4 5 +4 5 6 +(0,(1,(2))) +(1,(2,(3))) +(2,(3,(4))) +(3,(4,(5))) +(4,(5,(6))) +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02030_capnp_format.sh b/tests/queries/0_stateless/02030_capnp_format.sh new file mode 100755 index 00000000000..aa2fe6c1b35 --- /dev/null +++ b/tests/queries/0_stateless/02030_capnp_format.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-replicated-database + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +CAPN_PROTO_FILE=$USER_FILES_PATH/data.capnp +touch $CAPN_PROTO_FILE + +SCHEMADIR=$(clickhouse-client --query "select * from file('data.capnp', 'CapnProto', 'val1 char') settings format_schema='nonexist:Message'" 2>&1 | grep Exception | grep -oP "file \K.*(?=/nonexist.capnp)") +CLIENT_SCHEMADIR=$CURDIR/format_schemas +SERVER_SCHEMADIR=test_02030 +mkdir -p $SCHEMADIR/$SERVER_SCHEMADIR +cp -r $CLIENT_SCHEMADIR/02030_* $SCHEMADIR/$SERVER_SCHEMADIR/ + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_simple_types"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_simple_types (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixed FixedString(5), data String, date Date, datetime DateTime, datetime64 DateTime64(3)) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types values (-1, 1, -1000, 1000, -10000000, 1000000, -1000000000, 1000000000, 123.123, 123123123.123123123, 'Some string', 'fixed', 'Some data', '2000-01-06', '2000-06-01 19:42:42', '2000-04-01 11:21:33.123')" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_simple_types FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_simple_types" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_simple_types" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_tuples (value UInt64, tuple1 Tuple(one UInt64, two Tuple(three UInt64, four UInt64)), tuple2 Tuple(nested1 Tuple(nested2 Tuple(x UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples VALUES (1, (2, (3, 4)), (((5))))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_lists" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_lists (value UInt64, list1 Array(UInt64), list2 Array(Array(Array(UInt64)))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists VALUES (1, [1, 2, 3], [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], []], []])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_lists FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_lists:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_lists" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_lists" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_lists_and_tuples (value UInt64, nested Tuple(a Tuple(b UInt64, c Array(Array(UInt64))), d Array(Tuple(e Array(Array(Tuple(f UInt64, g UInt64))), h Array(Tuple(k Array(UInt64))))))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples VALUES (1, ((2, [[3, 4], [5, 6], []]), [([[(7, 8), (9, 10)], [(11, 12), (13, 14)], []], [([15, 16, 17]), ([])])]))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_lists_and_tuples FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_lists_and_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_lists_and_tuples" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_lists_and_tuples" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nested_table" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nested_table (nested Nested(value UInt64, array Array(UInt64), tuple Tuple(one UInt64, two UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table VALUES ([1, 2, 3], [[4, 5, 6], [], [7, 8]], [(9, 10), (11, 12), (13, 14)])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nested_table FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_table:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nested_table" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nested_table" + + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_nullable" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_nullable (nullable Nullable(UInt64), array Array(Nullable(UInt64)), tuple Tuple(nullable Nullable(UInt64))) ENGINE=Memory"; +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable VALUES (1, [1, Null, 2], (1)), (Null, [Null, Null, 42], (Null))" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_nullable FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nullable:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_nullable" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_nullable" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(number, 'Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2)') AS value FROM numbers(3) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_enum:Message'" > $CAPN_PROTO_FILE + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'oNe\' = 1, \'tWo\' = 2, \'threE\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 0, \'second\' = 1, \'third\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'three\' = 2)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 0, \'two\' = 1, \'tHrEe\' = 2, \'four\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'one\' = 1, \'two\' = 2, \'tHrEe\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_values'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'value Enum(\'first\' = 1, \'two\' = 2, \'three\' = 3)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_enum:Message', format_capn_proto_enum_comparising_mode='by_names_case_insensitive'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="CREATE TABLE capnp_low_cardinality (lc1 LowCardinality(String), lc2 LowCardinality(Nullable(String)), lc3 Array(LowCardinality(Nullable(String)))) ENGINE=Memory" +$CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality VALUES ('one', 'two', ['one', Null, 'two', Null]), ('two', Null, [Null])" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" | $CLICKHOUSE_CLIENT --query="INSERT INTO capnp_low_cardinality FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_low_cardinality:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM capnp_low_cardinality" +$CLICKHOUSE_CLIENT --query="DROP TABLE capnp_low_cardinality" + + +$CLICKHOUSE_CLIENT --query="SELECT CAST(tuple(number, tuple(number + 1, tuple(number + 2))), 'Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') AS a FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a_b UInt64, a_c_d UInt64, a_c_e_f UInt64') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a_b, number + 1 AS a_c_d, number + 2 AS a_c_e_f FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_nested_tuples:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(bb UInt64, c Tuple(d UInt64, e Tuple(f UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'a Tuple(b UInt64, c Tuple(d UInt64, e Tuple(ff UInt64)))') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_nested_tuples:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'string String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT number AS uint64 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_simple_types:Message'" > $CAPN_PROTO_FILE +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 String') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Array(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Enum(\'one\' = 1)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Tuple(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Nullable(UInt64)') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT * FROM file('data.capnp', 'CapnProto', 'uint64 Int32') SETTINGS format_schema='$SERVER_SCHEMADIR/02030_capnp_simple_types:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + + +$CLICKHOUSE_CLIENT --query="SELECT number AS a, toString(number) as b FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_unnamed_union:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable1 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; +$CLICKHOUSE_CLIENT --query="SELECT toNullable(toString(number)) as nullable2 FROM numbers(5) FORMAT CapnProto SETTINGS format_schema='$CLIENT_SCHEMADIR/02030_capnp_fake_nullable:Message'" 2>&1 | grep -F -q "CAPN_PROTO_BAD_CAST" && echo 'OK' || echo 'FAIL'; + +rm $CAPN_PROTO_FILE +rm -rf ${SCHEMADIR:?}/${SERVER_SCHEMADIR:?} diff --git a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh index 2a887cbbcae..13cf013b53b 100755 --- a/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh +++ b/tests/queries/0_stateless/02033_join_engine_deadlock_long.sh @@ -29,7 +29,7 @@ populate_table_bg () { $CLICKHOUSE_CLIENT --query " INSERT INTO join_block_test SELECT toString(number) as id, number * number as num - FROM system.numbers LIMIT 3000000 + FROM system.numbers LIMIT 500000 " >/dev/null ) & } diff --git a/tests/queries/0_stateless/02035_isNull_isNotNull_format.reference b/tests/queries/0_stateless/02035_isNull_isNotNull_format.reference index 74fc4a44b5f..4964bc68fff 100644 --- a/tests/queries/0_stateless/02035_isNull_isNotNull_format.reference +++ b/tests/queries/0_stateless/02035_isNull_isNotNull_format.reference @@ -7,3 +7,8 @@ explain syntax select isNull(null); SELECT NULL IS NULL explain syntax select isNotNull(null); SELECT NULL IS NOT NULL +explain syntax select isNotNull(1)+isNotNull(2) from remote('127.2', system.one); +SELECT (1 IS NOT NULL) + (2 IS NOT NULL) +FROM remote(\'127.2\', \'system.one\') +select isNotNull(1)+isNotNull(2) from remote('127.2', system.one); +2 diff --git a/tests/queries/0_stateless/02035_isNull_isNotNull_format.sql b/tests/queries/0_stateless/02035_isNull_isNotNull_format.sql index eff658445a8..d9138e12bca 100644 --- a/tests/queries/0_stateless/02035_isNull_isNotNull_format.sql +++ b/tests/queries/0_stateless/02035_isNull_isNotNull_format.sql @@ -3,3 +3,5 @@ explain syntax select null is null; explain syntax select null is not null; explain syntax select isNull(null); explain syntax select isNotNull(null); +explain syntax select isNotNull(1)+isNotNull(2) from remote('127.2', system.one); +select isNotNull(1)+isNotNull(2) from remote('127.2', system.one); diff --git a/tests/queries/0_stateless/02047_log_family_data_file_sizes.reference b/tests/queries/0_stateless/02047_log_family_data_file_sizes.reference new file mode 100644 index 00000000000..b802026a0dd --- /dev/null +++ b/tests/queries/0_stateless/02047_log_family_data_file_sizes.reference @@ -0,0 +1,47 @@ +Log: +empty: +1 element: +1 a +__marks.mrk +sizes.json +x.bin +y.bin +3 elements: +1 a +22 bc +333 def +__marks.mrk greater size +sizes.json +x.bin greater size +y.bin greater size + +TinyLog: +empty: +1 element: +1 a +sizes.json +x.bin +y.bin +3 elements: +1 a +22 bc +333 def +sizes.json +x.bin greater size +y.bin greater size + +StripeLog: +empty: +1 element: +1 a +data.bin +index.mrk +sizes.json +3 elements: +1 a +22 bc +333 def +data.bin greater size +index.mrk greater size +sizes.json + diff --git a/tests/queries/0_stateless/02047_log_family_data_file_sizes.sh b/tests/queries/0_stateless/02047_log_family_data_file_sizes.sh new file mode 100755 index 00000000000..e7c1cb5d71e --- /dev/null +++ b/tests/queries/0_stateless/02047_log_family_data_file_sizes.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +declare -a engines=("Log" "TinyLog" "StripeLog") +for engine in "${engines[@]}" +do + echo "$engine:" + + $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS tbl" + $CLICKHOUSE_CLIENT --query="CREATE TABLE tbl(x UInt32, y String) ENGINE=$engine" + data_dir=$($CLICKHOUSE_CLIENT --query="SELECT data_paths[1] FROM system.tables WHERE name='tbl' AND database=currentDatabase()") + + echo "empty:" + find "$data_dir"* 2>/dev/null + + echo "1 element:" + $CLICKHOUSE_CLIENT --query="INSERT INTO tbl VALUES (1, 'a')" + $CLICKHOUSE_CLIENT --query="SELECT * FROM tbl ORDER BY x" + declare -A file_sizes + for name in $(find "$data_dir"* -print0 | xargs -0 -n 1 basename | sort); do + file_path=$data_dir$name + file_size=$(stat -c%s "$file_path") + file_sizes[$name]=$file_size + echo $name + done + + echo "3 elements:" + $CLICKHOUSE_CLIENT --query="INSERT INTO tbl VALUES (22, 'bc'), (333, 'def')" + $CLICKHOUSE_CLIENT --query="SELECT * FROM tbl ORDER BY x" + for name in $(find "$data_dir"* -print0 | xargs -0 -n 1 basename | sort); do + file_path=$data_dir$name + file_size=$(stat -c%s "$file_path") + old_file_size=${file_sizes[$name]} + if [ "$name" == "sizes.json" ]; then + cmp="" + elif (( file_size > old_file_size )); then + cmp="greater size" + else + cmp="unexpected size ($file_size, old_size=$old_file_size)" + fi + echo $name $cmp + done + + echo +done diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.reference b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference new file mode 100644 index 00000000000..44c39f2a444 --- /dev/null +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.reference @@ -0,0 +1,15 @@ +execute: default +"foo" +1 +execute: --stage fetch_columns +"dummy" +0 +execute: --stage with_mergeable_state +"1" +1 +execute: --stage with_mergeable_state_after_aggregation +"1" +1 +execute: --stage complete +"foo" +1 diff --git a/tests/queries/0_stateless/02048_clickhouse_local_stage.sh b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh new file mode 100755 index 00000000000..5c1303b5160 --- /dev/null +++ b/tests/queries/0_stateless/02048_clickhouse_local_stage.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function execute_query() +{ + if [ $# -eq 0 ]; then + echo "execute: default" + else + echo "execute: $*" + fi + ${CLICKHOUSE_LOCAL} "$@" --format CSVWithNames -q "SELECT 1 AS foo" +} + +execute_query # default -- complete +execute_query --stage fetch_columns +execute_query --stage with_mergeable_state +execute_query --stage with_mergeable_state_after_aggregation +execute_query --stage complete diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect new file mode 100755 index 00000000000..17b98b077d5 --- /dev/null +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -0,0 +1,53 @@ +#!/usr/bin/expect -f +# Tags: no-fasttest + +log_user 0 +set timeout 20 +match_max 100000 + +# A default timeout action is to fail +expect_after { + timeout { + exit 1 + } + +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" +expect ":) " + +send -- "drop table if exists t\r" +expect "Ok." + +send -- "create table t engine=MergeTree() order by tuple() as select 1\r" +expect "Ok." + +send -- "set optimize_on_insert = 0\r" +expect "Ok." + +send -- "drop table if exists tt\r" +expect "Ok." + +send -- "create table tt (date Date, version UInt64, val UInt64) engine = ReplacingMergeTree(version) partition by date order by date\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 2, 2), ('2020-01-01', 1, 1)\r" +expect "Ok." + +send -- "insert into tt values ('2020-01-01', 0, 0)\r" +expect "Ok." + +send -- "OPTIMIZE TABLE tt\r" +expect "Ok." + +send -- "select * from tt order by version format TSV\r" +expect "2020-01-01\t2\t2" + +send -- "drop table tt\r" +expect "Ok." +send -- "drop table t\r" +expect "Ok." + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference new file mode 100644 index 00000000000..c84236dce7d --- /dev/null +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.reference @@ -0,0 +1,60 @@ +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 800 +9 900 +10 1000 +11 1100 +12 12000 +13 13000 +14 14000 +15 15000 +16 160000 +17 170000 +18 180000 +19 190000 +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 80000 +9 90000 +10 100000 +11 110000 +12 120000 +13 130000 +14 140000 +15 150000 +16 160000 +17 170000 +18 180000 +19 190000 +0 0 +1 1 +2 2 +3 3 +4 40 +5 50 +6 60 +7 70 +8 800 +9 900 +10 1000 +11 1100 +12 12000 +13 13000 +14 14000 +15 15000 +16 160000 +17 170000 +18 180000 +19 190000 diff --git a/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql new file mode 100644 index 00000000000..2a837380250 --- /dev/null +++ b/tests/queries/0_stateless/02049_lowcardinality_shortcircuit_crash.sql @@ -0,0 +1,45 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/30231 +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10), + CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100), + CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; + +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10), + CAST(NULL, 'Nullable(UInt8)'), toString(number * 100), + CAST(NULL, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; + +SELECT * +FROM ( + SELECT number, + multiIf( + CAST(number < 4, 'UInt8'), toString(number), + CAST(number < 8, 'LowCardinality(UInt8)'), toString(number * 10)::LowCardinality(String), + CAST(number < 12, 'Nullable(UInt8)'), toString(number * 100)::Nullable(String), + CAST(number < 16, 'LowCardinality(Nullable(UInt8))'), toString(number * 1000)::LowCardinality(Nullable(String)), + toString(number * 10000)) as m + FROM system.numbers + LIMIT 20 + ) +ORDER BY number +SETTINGS short_circuit_function_evaluation='enable'; diff --git a/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh new file mode 100755 index 00000000000..7a92fa6fefe --- /dev/null +++ b/tests/queries/0_stateless/02050_clickhouse_local_parsing_exception.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query="SELECT number FROM system.numbers INTO OUTFILE test.native.zst FORMAT Native" 2>&1 | grep -q "Code: 62. DB::Exception: Syntax error: failed at position 48 ('test'): test.native.zst FORMAT Native. Expected string literal." && echo 'OK' || echo 'FAIL' ||: + diff --git a/tests/queries/0_stateless/02050_client_profile_events.reference b/tests/queries/0_stateless/02050_client_profile_events.reference new file mode 100644 index 00000000000..00fc3b5d06a --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.reference @@ -0,0 +1,4 @@ +0 +SelectedRows: 131010 (increment) +OK +OK diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh new file mode 100755 index 00000000000..5c3887cf5fb --- /dev/null +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# do not print any ProfileEvents packets +$CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' +# print only last +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5) format Null' |& grep -o 'SelectedRows: .*$' +# print everything +test "$($CLICKHOUSE_CLIENT --print-profile-events -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL +# print each 100 ms +test "$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=100 -q 'select * from numbers(1e9) format Null' |& grep -c 'SelectedRows')" -gt 1 && echo OK || echo FAIL diff --git a/tests/queries/0_stateless/02051_read_settings.reference.j2 b/tests/queries/0_stateless/02051_read_settings.reference.j2 new file mode 100644 index 00000000000..86aa67a9d2d --- /dev/null +++ b/tests/queries/0_stateless/02051_read_settings.reference.j2 @@ -0,0 +1,11 @@ +{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%} +{% for direct_io in [0, 1] -%} +{% for prefetch in [0, 1] -%} +{% for priority in [0, 1] -%} +{% for buffer_size in [65505, 1048576] -%} +1000000 +{% endfor -%} +{% endfor -%} +{% endfor -%} +{% endfor -%} +{% endfor -%} diff --git a/tests/queries/0_stateless/02051_read_settings.sql.j2 b/tests/queries/0_stateless/02051_read_settings.sql.j2 new file mode 100644 index 00000000000..9f02274e732 --- /dev/null +++ b/tests/queries/0_stateless/02051_read_settings.sql.j2 @@ -0,0 +1,31 @@ +-- Tags: long +-- +-- Test for testing various read settings. + +drop table if exists data_02051; + +create table data_02051 (key Int, value String) engine=MergeTree() order by key +as select number, repeat(toString(number), 5) from numbers(1e6); + +{# check each local_filesystem_read_method #} +{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] %} +{# check w/ O_DIRECT and w/o (min_bytes_to_use_direct_io) #} +{% for direct_io in [0, 1] %} +{# check local_filesystem_read_prefetch (just a smoke test) #} +{% for prefetch in [0, 1] %} +{# check read_priority (just a smoke test) #} +{% for priority in [0, 1] %} +{# check alignment for O_DIRECT with various max_read_buffer_size #} +{% for buffer_size in [65505, 1048576] %} +select count(ignore(*)) from data_02051 settings + min_bytes_to_use_direct_io={{ direct_io }}, + local_filesystem_read_method='{{ read_method }}', + local_filesystem_read_prefetch={{ prefetch }}, + read_priority={{ priority }}, + max_read_buffer_size={{ buffer_size }} +; +{% endfor %} +{% endfor %} +{% endfor %} +{% endfor %} +{% endfor %} diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.reference b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02051_symlinks_to_user_files.sh b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh new file mode 100755 index 00000000000..dfdc71e0f0b --- /dev/null +++ b/tests/queries/0_stateless/02051_symlinks_to_user_files.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# See 01658_read_file_to_string_column.sh +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +FILE_PATH="${user_files_path}/file/" +mkdir -p ${FILE_PATH} +chmod 777 ${FILE_PATH} + +FILE="test_symlink_${CLICKHOUSE_DATABASE}" + +symlink_path=${FILE_PATH}/${FILE} +file_path=$CUR_DIR/${FILE} + +touch ${file_path} +ln -s ${file_path} ${symlink_path} +chmod ugo+w ${symlink_path} + +function cleanup() +{ + rm ${symlink_path} ${file_path} +} +trap cleanup EXIT + +${CLICKHOUSE_CLIENT} --query="insert into table function file('${symlink_path}', 'Values', 'a String') select 'OK'"; +${CLICKHOUSE_CLIENT} --query="select * from file('${symlink_path}', 'Values', 'a String')"; + diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference new file mode 100644 index 00000000000..d7d3ee8f362 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.reference @@ -0,0 +1,8 @@ +1 +1 +10 +10 +100 +100 +10000 +10000 diff --git a/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 new file mode 100644 index 00000000000..465aa22beb3 --- /dev/null +++ b/tests/queries/0_stateless/02052_last_granula_adjust_LOGICAL_ERROR.sql.j2 @@ -0,0 +1,19 @@ +-- Tags: long + +{% for rows_in_table in [1, 10, 100, 10000] %} +{% for wide in [0, 100000000] %} +drop table if exists data_02052_{{ rows_in_table }}_wide{{ wide }}; +create table data_02052_{{ rows_in_table }}_wide{{ wide }} (key Int, value String) +engine=MergeTree() +order by key +settings + min_bytes_for_wide_part={{ wide }} +as select number, repeat(toString(number), 5) from numbers({{ rows_in_table }}); + +-- avoid any optimizations with ignore(*) +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=1, max_threads=1; +select count(ignore(*)) from data_02052_{{ rows_in_table }}_wide{{ wide }} settings max_read_buffer_size=0, max_threads=1; -- { serverError CANNOT_READ_ALL_DATA } + +drop table data_02052_{{ rows_in_table }}_wide{{ wide }}; +{% endfor %} +{% endfor %} diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference new file mode 100644 index 00000000000..c4c0901b9df --- /dev/null +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference @@ -0,0 +1,12 @@ +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK +OK diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh new file mode 100755 index 00000000000..d37155e8506 --- /dev/null +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2206 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_LOCAL} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + +${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + +${CLICKHOUSE_CLIENT} --query 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" + + + diff --git a/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference new file mode 100644 index 00000000000..e8cc5e526c0 --- /dev/null +++ b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.reference @@ -0,0 +1,2 @@ +1 1 1 1 1 1 +1 diff --git a/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql new file mode 100644 index 00000000000..fc6be2eff7b --- /dev/null +++ b/tests/queries/0_stateless/02096_join_unusual_identifier_begin.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; + +CREATE TABLE t1 ( `a1` Int64, `1a1` Int64 ) ENGINE = Memory; +INSERT INTO t1 VALUES (1, 1); + +CREATE TABLE t2 ( `b1` Int64, `1b1` Int64 ) ENGINE = Memory; +INSERT INTO t2 VALUES (1, 1); + +CREATE TABLE t3 ( `c1` Int64, `1c1` Int64 ) ENGINE = Memory; +INSERT INTO t3 VALUES (1, 1); + +SELECT + * +FROM t1 AS t1 +INNER JOIN t2 AS t2 ON t1.a1 = t2.b1 +INNER JOIN t3 AS t3 ON t1.a1 = t3.c1; + +SELECT t2.`1b1` FROM t1 JOIN t2 ON a1 = b1; + +-- Without quialification it doesn't work: +-- SELECT `1b1` FROM t1 JOIN t2 ON a1 = b1; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.reference b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference new file mode 100644 index 00000000000..6d2e42f2ca6 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.reference @@ -0,0 +1,6 @@ +1 +AggregateFunction(uniqExact, Nullable(String)) +1 +AggregateFunction(uniqExact, Nullable(UInt8)) +1 +1 diff --git a/tests/queries/0_stateless/02097_initializeAggregationNullable.sql b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql new file mode 100644 index 00000000000..aa4e6d47579 --- /dev/null +++ b/tests/queries/0_stateless/02097_initializeAggregationNullable.sql @@ -0,0 +1,8 @@ +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable('foo'))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable('foo'))); + +SELECT finalizeAggregation(initializeAggregation('uniqExactState', toNullable(123))); +SELECT toTypeName(initializeAggregation('uniqExactState', toNullable(123))); + +SELECT initializeAggregation('uniqExactState', toNullable('foo')) = arrayReduce('uniqExactState', [toNullable('foo')]); +SELECT initializeAggregation('uniqExactState', toNullable(123)) = arrayReduce('uniqExactState', [toNullable(123)]); diff --git a/tests/queries/0_stateless/02097_remove_sample_by.reference b/tests/queries/0_stateless/02097_remove_sample_by.reference new file mode 100644 index 00000000000..0747bbd5d1f --- /dev/null +++ b/tests/queries/0_stateless/02097_remove_sample_by.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.t_remove_sample_by\n(\n `id` UInt64\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.t_remove_sample_by\n(\n `id` UInt64\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/default/t_remove_sample_by\', \'1\')\nORDER BY id\nSETTINGS index_granularity = 8192 +CREATE TABLE default.t_remove_sample_by\n(\n `id` String\n)\nENGINE = MergeTree\nORDER BY id\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02097_remove_sample_by.sql b/tests/queries/0_stateless/02097_remove_sample_by.sql new file mode 100644 index 00000000000..89fbfe0c4c5 --- /dev/null +++ b/tests/queries/0_stateless/02097_remove_sample_by.sql @@ -0,0 +1,44 @@ +-- Tags: zookeeper + +DROP TABLE IF EXISTS t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id UInt64) ENGINE = MergeTree ORDER BY id SAMPLE BY id; + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; +SHOW CREATE TABLE t_remove_sample_by; + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; -- { serverError 36 } +SELECT * FROM t_remove_sample_by SAMPLE 1 / 10; -- { serverError 141 } + +DROP TABLE t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/{database}/t_remove_sample_by', '1') +ORDER BY id SAMPLE BY id; + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; +SHOW CREATE TABLE t_remove_sample_by; + +DROP TABLE t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id UInt64) ENGINE = Memory; +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; -- { serverError 36 } + +DROP TABLE t_remove_sample_by; + +CREATE TABLE t_remove_sample_by(id String) +ENGINE = MergeTree ORDER BY id SAMPLE BY id +SETTINGS check_sample_column_is_correct = 0; + +ALTER TABLE t_remove_sample_by RESET SETTING check_sample_column_is_correct; + +DETACH TABLE t_remove_sample_by; +ATTACH TABLE t_remove_sample_by; + +INSERT INTO t_remove_sample_by VALUES (1); +SELECT * FROM t_remove_sample_by SAMPLE 1 / 10; -- { serverError 59 } + +ALTER TABLE t_remove_sample_by REMOVE SAMPLE BY; +SHOW CREATE TABLE t_remove_sample_by; + +DROP TABLE t_remove_sample_by; diff --git a/tests/queries/0_stateless/02098_date32_comparison.reference b/tests/queries/0_stateless/02098_date32_comparison.reference new file mode 100644 index 00000000000..16d1f96acfd --- /dev/null +++ b/tests/queries/0_stateless/02098_date32_comparison.reference @@ -0,0 +1,19 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02098_date32_comparison.sql b/tests/queries/0_stateless/02098_date32_comparison.sql new file mode 100644 index 00000000000..5fd7172e0bb --- /dev/null +++ b/tests/queries/0_stateless/02098_date32_comparison.sql @@ -0,0 +1,19 @@ +select toDate32('1990-01-01') = toDate('1990-01-01'); +select toDate('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1925-01-01') <= toDate('1990-01-01'); +select toDate('1991-01-01') < toDate32('2283-11-11'); +select toDate32('1990-01-01') = toDateTime('1990-01-01'); +select toDateTime('1991-01-02') > toDate32('1990-01-01'); +select toDate32('1925-01-01') <= toDateTime('1990-01-01'); +select toDateTime('1991-01-01') < toDate32('2283-11-11'); +select toDate32('1990-01-01') = toDateTime64('1990-01-01',2); +select toDateTime64('1991-01-02',2) > toDate32('1990-01-01'); +select toDate32('1925-01-01') = toDateTime64('1925-01-01',2); +select toDateTime64('1925-01-02',2) > toDate32('1925-01-01'); +select toDate32('2283-11-11') = toDateTime64('2283-11-11',2); +select toDateTime64('2283-11-11',2) > toDate32('1925-01-01'); +select toDate32('1990-01-01') = '1990-01-01'; +select '1991-01-02' > toDate32('1990-01-01'); +select toDate32('1925-01-01') = '1925-01-01'; +select '2283-11-11' >= toDate32('2283-11-10'); +select '2283-11-11' > toDate32('1925-01-01'); \ No newline at end of file diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference new file mode 100644 index 00000000000..6e88bbad146 --- /dev/null +++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.reference @@ -0,0 +1,66 @@ +Dictionary hashed_array_dictionary_simple_key_simple_attributes +dictGet existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 value_0 value_second_0 +1 value_1 value_second_1 +2 value_2 value_second_2 +Dictionary hashed_array_dictionary_simple_key_complex_attributes +dictGet existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 value_0 value_second_0 +1 value_1 \N +2 value_2 value_second_2 +Dictionary hashed_array_dictionary_simple_key_hierarchy +dictGet +0 +0 +1 +1 +2 +dictGetHierarchy +[1] +[4,2,1] diff --git a/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql new file mode 100644 index 00000000000..8d792836562 --- /dev/null +++ b/tests/queries/0_stateless/02098_hashed_array_dictionary_simple_key.sql @@ -0,0 +1,125 @@ +DROP TABLE IF EXISTS simple_key_simple_attributes_source_table; +CREATE TABLE simple_key_simple_attributes_source_table +( + id UInt64, + value_first String, + value_second String +) +ENGINE = TinyLog; + +INSERT INTO simple_key_simple_attributes_source_table VALUES(0, 'value_0', 'value_second_0'); +INSERT INTO simple_key_simple_attributes_source_table VALUES(1, 'value_1', 'value_second_1'); +INSERT INTO simple_key_simple_attributes_source_table VALUES(2, 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_simple_attributes; +CREATE DICTIONARY hashed_array_dictionary_simple_key_simple_attributes +( + id UInt64, + value_first String DEFAULT 'value_first_default', + value_second String DEFAULT 'value_second_default' +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'simple_key_simple_attributes_source_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Dictionary hashed_array_dictionary_simple_key_simple_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_simple_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_simple_key_simple_attributes', number) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_simple_key_simple_attributes ORDER BY id; + +DROP DICTIONARY hashed_array_dictionary_simple_key_simple_attributes; + +DROP TABLE simple_key_simple_attributes_source_table; + +DROP TABLE IF EXISTS simple_key_complex_attributes_source_table; +CREATE TABLE simple_key_complex_attributes_source_table +( + id UInt64, + value_first String, + value_second Nullable(String) +) +ENGINE = TinyLog; + +INSERT INTO simple_key_complex_attributes_source_table VALUES(0, 'value_0', 'value_second_0'); +INSERT INTO simple_key_complex_attributes_source_table VALUES(1, 'value_1', NULL); +INSERT INTO simple_key_complex_attributes_source_table VALUES(2, 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_complex_attributes; +CREATE DICTIONARY hashed_array_dictionary_simple_key_complex_attributes +( + id UInt64, + value_first String DEFAULT 'value_first_default', + value_second Nullable(String) DEFAULT 'value_second_default' +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'simple_key_complex_attributes_source_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Dictionary hashed_array_dictionary_simple_key_complex_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number) as value_first, + dictGet('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_first', number, toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_simple_key_complex_attributes', 'value_second', number, toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_simple_key_complex_attributes', number) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_simple_key_complex_attributes ORDER BY id; + +DROP DICTIONARY hashed_array_dictionary_simple_key_complex_attributes; +DROP TABLE simple_key_complex_attributes_source_table; + +DROP TABLE IF EXISTS simple_key_hierarchy_table; +CREATE TABLE simple_key_hierarchy_table +( + id UInt64, + parent_id UInt64 +) ENGINE = TinyLog(); + +INSERT INTO simple_key_hierarchy_table VALUES (1, 0); +INSERT INTO simple_key_hierarchy_table VALUES (2, 1); +INSERT INTO simple_key_hierarchy_table VALUES (3, 1); +INSERT INTO simple_key_hierarchy_table VALUES (4, 2); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_simple_key_hierarchy; +CREATE DICTIONARY hashed_array_dictionary_simple_key_hierarchy +( + id UInt64, + parent_id UInt64 HIERARCHICAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'simple_key_hierarchy_table')) +LAYOUT(HASHED_ARRAY()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Dictionary hashed_array_dictionary_simple_key_hierarchy'; +SELECT 'dictGet'; +SELECT dictGet('hashed_array_dictionary_simple_key_hierarchy', 'parent_id', number) FROM system.numbers LIMIT 5; +SELECT 'dictGetHierarchy'; +SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(1)); +SELECT dictGetHierarchy('hashed_array_dictionary_simple_key_hierarchy', toUInt64(4)); + +DROP DICTIONARY hashed_array_dictionary_simple_key_hierarchy; +DROP TABLE simple_key_hierarchy_table; diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference new file mode 100644 index 00000000000..45a4fb75db8 --- /dev/null +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.reference @@ -0,0 +1 @@ +8 diff --git a/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql new file mode 100644 index 00000000000..c5bd2b5b5f2 --- /dev/null +++ b/tests/queries/0_stateless/02098_sql_user_defined_functions_aliases.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel +CREATE FUNCTION 02098_alias_function AS x -> (((x * 2) AS x_doubled) + x_doubled); +SELECT 02098_alias_function(2); +DROP FUNCTION 02098_alias_function; diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference new file mode 100644 index 00000000000..ec32fa72b4e --- /dev/null +++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.reference @@ -0,0 +1,56 @@ +Dictionary hashed_array_dictionary_complex_key_simple_attributes +dictGet existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 value_second_1 +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 id_key_0 value_0 value_second_0 +1 id_key_1 value_1 value_second_1 +2 id_key_2 value_2 value_second_2 +Dictionary hashed_array_dictionary_complex_key_complex_attributes +dictGet existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGet with non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +value_first_default value_second_default +dictGetOrDefault existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +dictGetOrDefault non existing value +value_0 value_second_0 +value_1 \N +value_2 value_second_2 +default default +dictHas +1 +1 +1 +0 +select all values as input stream +0 id_key_0 value_0 value_second_0 +1 id_key_1 value_1 \N +2 id_key_2 value_2 value_second_2 diff --git a/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql new file mode 100644 index 00000000000..4d2a825c8af --- /dev/null +++ b/tests/queries/0_stateless/02099_hashed_array_dictionary_complex_key.sql @@ -0,0 +1,97 @@ +DROP TABLE IF EXISTS complex_key_simple_attributes_source_table; +CREATE TABLE complex_key_simple_attributes_source_table +( + id UInt64, + id_key String, + value_first String, + value_second String +) +ENGINE = TinyLog; + +INSERT INTO complex_key_simple_attributes_source_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0'); +INSERT INTO complex_key_simple_attributes_source_table VALUES(1, 'id_key_1', 'value_1', 'value_second_1'); +INSERT INTO complex_key_simple_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_simple_attributes; +CREATE DICTIONARY hashed_array_dictionary_complex_key_simple_attributes +( + id UInt64, + id_key String, + value_first String DEFAULT 'value_first_default', + value_second String DEFAULT 'value_second_default' +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(TABLE 'complex_key_simple_attributes_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); + +SELECT 'Dictionary hashed_array_dictionary_complex_key_simple_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_simple_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_complex_key_simple_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_complex_key_simple_attributes ORDER BY (id, id_key); + +DROP DICTIONARY hashed_array_dictionary_complex_key_simple_attributes; + +DROP TABLE complex_key_simple_attributes_source_table; + +DROP TABLE IF EXISTS complex_key_complex_attributes_source_table; +CREATE TABLE complex_key_complex_attributes_source_table +( + id UInt64, + id_key String, + value_first String, + value_second Nullable(String) +) +ENGINE = TinyLog; + +INSERT INTO complex_key_complex_attributes_source_table VALUES(0, 'id_key_0', 'value_0', 'value_second_0'); +INSERT INTO complex_key_complex_attributes_source_table VALUES(1, 'id_key_1', 'value_1', NULL); +INSERT INTO complex_key_complex_attributes_source_table VALUES(2, 'id_key_2', 'value_2', 'value_second_2'); + +DROP DICTIONARY IF EXISTS hashed_array_dictionary_complex_key_complex_attributes; +CREATE DICTIONARY hashed_array_dictionary_complex_key_complex_attributes +( + id UInt64, + id_key String, + + value_first String DEFAULT 'value_first_default', + value_second Nullable(String) DEFAULT 'value_second_default' +) +PRIMARY KEY id, id_key +SOURCE(CLICKHOUSE(TABLE 'complex_key_complex_attributes_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(COMPLEX_KEY_HASHED_ARRAY()); + +SELECT 'Dictionary hashed_array_dictionary_complex_key_complex_attributes'; +SELECT 'dictGet existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGet with non existing value'; +SELECT dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number)))) as value_first, + dictGet('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number)))) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictGetOrDefault existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 3; +SELECT 'dictGetOrDefault non existing value'; +SELECT dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_first', (number, concat('id_key_', toString(number))), toString('default')) as value_first, + dictGetOrDefault('hashed_array_dictionary_complex_key_complex_attributes', 'value_second', (number, concat('id_key_', toString(number))), toString('default')) as value_second FROM system.numbers LIMIT 4; +SELECT 'dictHas'; +SELECT dictHas('hashed_array_dictionary_complex_key_complex_attributes', (number, concat('id_key_', toString(number)))) FROM system.numbers LIMIT 4; +SELECT 'select all values as input stream'; +SELECT * FROM hashed_array_dictionary_complex_key_complex_attributes ORDER BY (id, id_key); + +DROP DICTIONARY hashed_array_dictionary_complex_key_complex_attributes; +DROP TABLE complex_key_complex_attributes_source_table; diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference new file mode 100644 index 00000000000..8f6cd5ccd03 --- /dev/null +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.reference @@ -0,0 +1 @@ +[2,4,6] diff --git a/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql new file mode 100644 index 00000000000..1c926faf3a1 --- /dev/null +++ b/tests/queries/0_stateless/02099_sql_user_defined_functions_lambda.sql @@ -0,0 +1,4 @@ +-- Tags: no-parallel +CREATE FUNCTION 02099_lambda_function AS x -> arrayMap(array_element -> array_element * 2, x); +SELECT 02099_lambda_function([1,2,3]); +DROP FUNCTION 02099_lambda_function; diff --git a/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql new file mode 100644 index 00000000000..32b757f54a3 --- /dev/null +++ b/tests/queries/0_stateless/02100_alter_scalar_circular_deadlock.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS foo; + +CREATE TABLE foo (ts DateTime, x UInt64) +ENGINE = MergeTree PARTITION BY toYYYYMMDD(ts) +ORDER BY (ts); + +INSERT INTO foo (ts, x) SELECT toDateTime('2020-01-01 00:05:00'), number from system.numbers_mt LIMIT 10; + +SET mutations_sync = 1; + +ALTER TABLE foo UPDATE x = 1 WHERE x = (SELECT x from foo WHERE x = 4); + +SELECT sum(x) == 42 FROM foo; + +ALTER TABLE foo UPDATE x = 1 WHERE x IN (SELECT x FROM foo WHERE x != 0); + +SELECT sum(x) == 9 FROM foo; + +DROP TABLE IF EXISTS bar; + +CREATE TABLE bar (ts DateTime, x UInt64) +ENGINE = Memory; + +INSERT INTO bar (ts, x) SELECT toDateTime('2020-01-01 00:05:00'), number from system.numbers_mt LIMIT 10; + +SET mutations_sync = 1; + +ALTER TABLE bar UPDATE x = 1 WHERE x = (SELECT x from bar WHERE x = 4); + +SELECT sum(x) == 42 FROM bar; + +ALTER TABLE bar UPDATE x = 1 WHERE x IN (SELECT x FROM bar WHERE x != 0); + +SELECT sum(x) == 9 FROM bar; diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference new file mode 100644 index 00000000000..993dd9b1cde --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -0,0 +1,11 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql new file mode 100644 index 00000000000..32f7f63f6d0 --- /dev/null +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -0,0 +1,14 @@ +SELECT 'aaaabb ' == trim(leading 'b ' FROM 'b aaaabb ') x; +SELECT 'b aaaa' == trim(trailing 'b ' FROM 'b aaaabb ') x; +SELECT 'aaaa' == trim(both 'b ' FROM 'b aaaabb ') x; + +SELECT '1' == replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; + +SELECT '1,,' == replaceRegexpOne(',,1,,', '^[,]*|[,]*$', '') x; +SELECT '1' == replaceRegexpOne(',,1', '^[,]*|[,]*$', '') x; +SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; + +SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; +SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference new file mode 100644 index 00000000000..437cc81afba --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.reference @@ -0,0 +1,4 @@ +CREATE FUNCTION `02101_test_function` AS x -> (x + 1) +2 +CREATE FUNCTION `02101_test_function` AS x -> (x + 2) +3 diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql new file mode 100644 index 00000000000..7b0ad311bd4 --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_create_or_replace.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel + +CREATE OR REPLACE FUNCTION 02101_test_function AS x -> x + 1; + +SELECT create_query FROM system.functions WHERE name = '02101_test_function'; +SELECT 02101_test_function(1); + +CREATE OR REPLACE FUNCTION 02101_test_function AS x -> x + 2; + +SELECT create_query FROM system.functions WHERE name = '02101_test_function'; +SELECT 02101_test_function(1); + +DROP FUNCTION 02101_test_function; diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql new file mode 100644 index 00000000000..09e2677774c --- /dev/null +++ b/tests/queries/0_stateless/02101_sql_user_defined_functions_drop_if_exists.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +CREATE FUNCTION 02101_test_function AS x -> x + 1; + +SELECT 02101_test_function(1); + +DROP FUNCTION 02101_test_function; +DROP FUNCTION 02101_test_function; --{serverError 46} +DROP FUNCTION IF EXISTS 02101_test_function; diff --git a/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql new file mode 100644 index 00000000000..092fa660cb0 --- /dev/null +++ b/tests/queries/0_stateless/02102_sql_user_defined_functions_create_if_not_exists.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel + +CREATE FUNCTION IF NOT EXISTS 02102_test_function AS x -> x + 1; +SELECT 02102_test_function(1); + +CREATE FUNCTION 02102_test_function AS x -> x + 1; --{serverError 609} +CREATE FUNCTION IF NOT EXISTS 02102_test_function AS x -> x + 1; +DROP FUNCTION 02102_test_function; diff --git a/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference new file mode 100644 index 00000000000..51993f072d5 --- /dev/null +++ b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.reference @@ -0,0 +1,2 @@ +2 +2 diff --git a/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql new file mode 100644 index 00000000000..3d34413b9d3 --- /dev/null +++ b/tests/queries/0_stateless/02103_sql_user_defined_functions_composition.sql @@ -0,0 +1,12 @@ +-- Tags: no-parallel + +CREATE FUNCTION 02103_test_function AS x -> x + 1; +CREATE FUNCTION 02103_test_function_with_nested_function_empty_args AS () -> 02103_test_function(1); +CREATE FUNCTION 02103_test_function_with_nested_function_arg AS (x) -> 02103_test_function(x); + +SELECT 02103_test_function_with_nested_function_empty_args(); +SELECT 02103_test_function_with_nested_function_arg(1); + +DROP FUNCTION 02103_test_function_with_nested_function_empty_args; +DROP FUNCTION 02103_test_function_with_nested_function_arg; +DROP FUNCTION 02103_test_function; diff --git a/tests/queries/0_stateless/02104_clickhouse_local_columns_description.reference b/tests/queries/0_stateless/02104_clickhouse_local_columns_description.reference new file mode 100644 index 00000000000..0e291f3ac0d --- /dev/null +++ b/tests/queries/0_stateless/02104_clickhouse_local_columns_description.reference @@ -0,0 +1 @@ +1 42 diff --git a/tests/queries/0_stateless/02104_clickhouse_local_columns_description.sh b/tests/queries/0_stateless/02104_clickhouse_local_columns_description.sh new file mode 100755 index 00000000000..f88a8de80f5 --- /dev/null +++ b/tests/queries/0_stateless/02104_clickhouse_local_columns_description.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --query "create table t (n int, m int default 42) engine=Memory;insert into t values (1, NULL);select * from t" diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp new file mode 100644 index 00000000000..f033b177a45 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_enum.capnp @@ -0,0 +1,13 @@ +@0x9ef128e10a8010b2; + +struct Message +{ + value @0 : EnumType; + + enum EnumType + { + one @0; + two @1; + tHrEe @2; + } +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp new file mode 100644 index 00000000000..a027692e4bc --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_fake_nullable.capnp @@ -0,0 +1,23 @@ +@0xd8dd7b35452d1c4c; + +struct FakeNullable1 +{ + union + { + value @0 : Text; + null @1 : Void; + trash @2 : Text; + } +} + +struct FakeNullable2 +{ + value @0 : Text; + null @1 : Void; +} + +struct Message +{ + nullable1 @0 : FakeNullable1; + nullable2 @1 : FakeNullable2; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp new file mode 100644 index 00000000000..78fe3cf551e --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_lists.capnp @@ -0,0 +1,8 @@ +@0x9ef128e10a8010b7; + +struct Message +{ + value @0 : UInt64; + list1 @1 : List(UInt64); + list2 @2 : List(List(List(UInt64))); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp new file mode 100644 index 00000000000..0958889f0d8 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_low_cardinality.capnp @@ -0,0 +1,17 @@ +@0x9ef128e10a8010b7; + +struct NullableText +{ + union + { + value @0 : Text; + null @1 : Void; + } +} + +struct Message +{ + lc1 @0 : Text; + lc2 @1 : NullableText; + lc3 @2 : List(NullableText); +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp new file mode 100644 index 00000000000..11fa99f62f5 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_lists_and_tuples.capnp @@ -0,0 +1,36 @@ +@0x9ef128e10a8010b2; + +struct Nested1 +{ + b @0 : UInt64; + c @1 : List(List(UInt64)); +} + +struct Nested2 +{ + e @0 : List(List(Nested3)); + h @1 : List(Nested4); +} + +struct Nested3 +{ + f @0 : UInt64; + g @1 : UInt64; +} + +struct Nested4 +{ + k @0 : List(UInt64); +} + +struct Nested +{ + a @0 : Nested1; + d @1 : List(Nested2); +} + +struct Message +{ + value @0 : UInt64; + nested @1 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp new file mode 100644 index 00000000000..42f17246d58 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_table.capnp @@ -0,0 +1,20 @@ +@0x9ef128e10a8010b3; + + +struct Nested1 +{ + one @0 : UInt64; + two @1 : UInt64; +} + +struct Nested +{ + value @0 : List(UInt64); + array @1 : List(List(UInt64)); + tuple @2 : List(Nested1); +} + +struct Message +{ + nested @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp new file mode 100644 index 00000000000..161c1bbaea6 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nested_tuples.capnp @@ -0,0 +1,23 @@ +@0x9ef128e12a8010b2; + +struct Nested1 +{ + d @0 : UInt64; + e @1 : Nested2; +} + +struct Nested2 +{ + f @0 : UInt64; +} + +struct Nested +{ + b @0 : UInt64; + c @1 : Nested1; +} + +struct Message +{ + a @0 : Nested; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp new file mode 100644 index 00000000000..41254911710 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_nullable.capnp @@ -0,0 +1,22 @@ +@0x9ef128e10a8010b2; + +struct NullableUInt64 +{ + union + { + value @0 : UInt64; + null @1 : Void; + } +} + +struct Tuple +{ + nullable @0 : NullableUInt64; +} + +struct Message +{ + nullable @0 : NullableUInt64; + array @1 : List(NullableUInt64); + tuple @2 : Tuple; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp new file mode 100644 index 00000000000..a85bbbc511b --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_simple_types.capnp @@ -0,0 +1,21 @@ +@0xd9dd7b35452d1c4f; + +struct Message +{ + int8 @0 : Int8; + uint8 @1 : UInt8; + int16 @2 : Int16; + uint16 @3 : UInt16; + int32 @4 : Int32; + uint32 @5 : UInt32; + int64 @6 : Int64; + uint64 @7 : UInt64; + float32 @8 : Float32; + float64 @9 : Float64; + string @10 : Text; + fixed @11 : Text; + data @12 : Data; + date @13 : UInt16; + datetime @14 : UInt32; + datetime64 @15 : Int64; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp new file mode 100644 index 00000000000..21c3f0eb2e1 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_tuples.capnp @@ -0,0 +1,35 @@ +@0x9ef128e10a8010b8; + +struct Nested5 +{ + x @0 : UInt64; +} + +struct Nested4 +{ + nested2 @0 : Nested5; +} + +struct Nested3 +{ + nested1 @0 : Nested4; +} + +struct Nested2 +{ + three @0 : UInt64; + four @1 : UInt64; +} + +struct Nested1 +{ + one @0 : UInt64; + two @1 : Nested2; +} + +struct Message +{ + value @0 : UInt64; + tuple1 @1 : Nested1; + tuple2 @2 : Nested3; +} diff --git a/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp new file mode 100644 index 00000000000..9fb5e37bfea --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/02030_capnp_unnamed_union.capnp @@ -0,0 +1,10 @@ +@0xd8dd7b35452d1c4f; + +struct Message +{ + union + { + a @0 : UInt64; + b @1 : Text; + } +} diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.reference b/tests/queries/1_stateful/00167_read_bytes_from_fs.reference new file mode 100644 index 00000000000..05b54da2ac7 --- /dev/null +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.reference @@ -0,0 +1,2 @@ +468426149779992039 +1 diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql new file mode 100644 index 00000000000..ac20e60b177 --- /dev/null +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -0,0 +1,7 @@ +SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; + +-- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. +-- Small additional reads still possible, so we compare with about 1.5Gb. +SYSTEM FLUSH LOGS; + +SELECT ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] < 1500000000 from system.query_log where query = 'SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40;' and current_database = currentDatabase() and type = 'QueryFinish'; diff --git a/utils/antlr/ClickHouseLexer.g4 b/utils/antlr/ClickHouseLexer.g4 index 8a1debaf412..0c087bff68c 100644 --- a/utils/antlr/ClickHouseLexer.g4 +++ b/utils/antlr/ClickHouseLexer.g4 @@ -35,6 +35,7 @@ CONSTRAINT: C O N S T R A I N T; CREATE: C R E A T E; CROSS: C R O S S; CUBE: C U B E; +CURRENT: C U R R E N T; DATABASE: D A T A B A S E; DATABASES: D A T A B A S E S; DATE: D A T E; @@ -65,6 +66,7 @@ FETCHES: F E T C H E S; FINAL: F I N A L; FIRST: F I R S T; FLUSH: F L U S H; +FOLLOWING: F O L L O W I N G; FOR: F O R; FORMAT: F O R M A T; FREEZE: F R E E Z E; @@ -125,8 +127,10 @@ OR: O R; ORDER: O R D E R; OUTER: O U T E R; OUTFILE: O U T F I L E; +OVER: O V E R; PARTITION: P A R T I T I O N; POPULATE: P O P U L A T E; +PRECEDING: P R E C E D I N G; PREWHERE: P R E W H E R E; PRIMARY: P R I M A R Y; PROJECTION: P R O J E C T I O N; @@ -140,6 +144,8 @@ REPLICA: R E P L I C A; REPLICATED: R E P L I C A T E D; RIGHT: R I G H T; ROLLUP: R O L L U P; +ROW: R O W; +ROWS: R O W S; SAMPLE: S A M P L E; SECOND: S E C O N D; SELECT: S E L E C T; @@ -171,6 +177,7 @@ TRIM: T R I M; TRUNCATE: T R U N C A T E; TTL: T T L; TYPE: T Y P E; +UNBOUNDED: U N B O U N D E D; UNION: U N I O N; UPDATE: U P D A T E; USE: U S E; @@ -183,6 +190,7 @@ WATCH: W A T C H; WEEK: W E E K; WHEN: W H E N; WHERE: W H E R E; +WINDOW: W I N D O W; WITH: W I T H; YEAR: Y E A R | Y Y Y Y; diff --git a/utils/antlr/ClickHouseParser.g4 b/utils/antlr/ClickHouseParser.g4 index eb1908ed073..24db6478aa0 100644 --- a/utils/antlr/ClickHouseParser.g4 +++ b/utils/antlr/ClickHouseParser.g4 @@ -243,6 +243,7 @@ selectStmt: SELECT DISTINCT? topClause? columnExprList fromClause? arrayJoinClause? + windowClause? prewhereClause? whereClause? groupByClause? (WITH (CUBE | ROLLUP))? (WITH TOTALS)? @@ -257,6 +258,7 @@ withClause: WITH columnExprList; topClause: TOP DECIMAL_LITERAL (WITH TIES)?; fromClause: FROM joinExpr; arrayJoinClause: (LEFT | INNER)? ARRAY JOIN columnExprList; +windowClause: WINDOW identifier AS LPAREN windowExpr RPAREN; prewhereClause: PREWHERE columnExpr; whereClause: WHERE columnExpr; groupByClause: GROUP BY ((CUBE | ROLLUP) LPAREN columnExprList RPAREN | columnExprList); @@ -298,6 +300,18 @@ ratioExpr: numberLiteral (SLASH numberLiteral)?; settingExprList: settingExpr (COMMA settingExpr)*; settingExpr: identifier EQ_SINGLE literal; +windowExpr: winPartitionByClause? winOrderByClause? winFrameClause?; +winPartitionByClause: PARTITION BY columnExprList; +winOrderByClause: ORDER BY orderExprList; +winFrameClause: (ROWS | RANGE) winFrameExtend; +winFrameExtend + : winFrameBound # frameStart + | BETWEEN winFrameBound AND winFrameBound # frameBetween + ; +winFrameBound: (CURRENT ROW | UNBOUNDED PRECEDING | UNBOUNDED FOLLOWING | numberLiteral PRECEDING | numberLiteral FOLLOWING); +//rangeClause: RANGE LPAREN (MIN identifier MAX identifier | MAX identifier MIN identifier) RPAREN; + + // SET statement setStmt: SET settingExprList; @@ -364,6 +378,8 @@ columnExpr | SUBSTRING LPAREN columnExpr FROM columnExpr (FOR columnExpr)? RPAREN # ColumnExprSubstring | TIMESTAMP STRING_LITERAL # ColumnExprTimestamp | TRIM LPAREN (BOTH | LEADING | TRAILING) STRING_LITERAL FROM columnExpr RPAREN # ColumnExprTrim + | identifier (LPAREN columnExprList? RPAREN) OVER LPAREN windowExpr RPAREN # ColumnExprWinFunction + | identifier (LPAREN columnExprList? RPAREN) OVER identifier # ColumnExprWinFunctionTarget | identifier (LPAREN columnExprList? RPAREN)? LPAREN DISTINCT? columnArgList? RPAREN # ColumnExprFunction | literal # ColumnExprLiteral @@ -454,17 +470,17 @@ interval: SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR; keyword // except NULL_SQL, INF, NAN_SQL : AFTER | ALIAS | ALL | ALTER | AND | ANTI | ANY | ARRAY | AS | ASCENDING | ASOF | AST | ASYNC | ATTACH | BETWEEN | BOTH | BY | CASE - | CAST | CHECK | CLEAR | CLUSTER | CODEC | COLLATE | COLUMN | COMMENT | CONSTRAINT | CREATE | CROSS | CUBE | DATABASE | DATABASES - | DATE | DEDUPLICATE | DEFAULT | DELAY | DELETE | DESCRIBE | DESC | DESCENDING | DETACH | DICTIONARIES | DICTIONARY | DISK | DISTINCT - | DISTRIBUTED | DROP | ELSE | END | ENGINE | EVENTS | EXISTS | EXPLAIN | EXPRESSION | EXTRACT | FETCHES | FINAL | FIRST | FLUSH | FOR - | FORMAT | FREEZE | FROM | FULL | FUNCTION | GLOBAL | GRANULARITY | GROUP | HAVING | HIERARCHICAL | ID | IF | ILIKE | IN | INDEX - | INJECTIVE | INNER | INSERT | INTERVAL | INTO | IS | IS_OBJECT_ID | JOIN | JSON_FALSE | JSON_TRUE | KEY | KILL | LAST | LAYOUT - | LEADING | LEFT | LIFETIME | LIKE | LIMIT | LIVE | LOCAL | LOGS | MATERIALIZE | MATERIALIZED | MAX | MERGES | MIN | MODIFY | MOVE - | MUTATION | NO | NOT | NULLS | OFFSET | ON | OPTIMIZE | OR | ORDER | OUTER | OUTFILE | PARTITION | POPULATE | PREWHERE | PRIMARY - | RANGE | RELOAD | REMOVE | RENAME | REPLACE | REPLICA | REPLICATED | RIGHT | ROLLUP | SAMPLE | SELECT | SEMI | SENDS | SET | SETTINGS - | SHOW | SOURCE | START | STOP | SUBSTRING | SYNC | SYNTAX | SYSTEM | TABLE | TABLES | TEMPORARY | TEST | THEN | TIES | TIMEOUT - | TIMESTAMP | TOTALS | TRAILING | TRIM | TRUNCATE | TO | TOP | TTL | TYPE | UNION | UPDATE | USE | USING | UUID | VALUES | VIEW - | VOLUME | WATCH | WHEN | WHERE | WITH + | CAST | CHECK | CLEAR | CLUSTER | CODEC | COLLATE | COLUMN | COMMENT | CONSTRAINT | CREATE | CROSS | CUBE | CURRENT | DATABASE + | DATABASES | DATE | DEDUPLICATE | DEFAULT | DELAY | DELETE | DESCRIBE | DESC | DESCENDING | DETACH | DICTIONARIES | DICTIONARY | DISK + | DISTINCT | DISTRIBUTED | DROP | ELSE | END | ENGINE | EVENTS | EXISTS | EXPLAIN | EXPRESSION | EXTRACT | FETCHES | FINAL | FIRST + | FLUSH | FOR | FOLLOWING | FOR | FORMAT | FREEZE | FROM | FULL | FUNCTION | GLOBAL | GRANULARITY | GROUP | HAVING | HIERARCHICAL | ID + | IF | ILIKE | IN | INDEX | INJECTIVE | INNER | INSERT | INTERVAL | INTO | IS | IS_OBJECT_ID | JOIN | JSON_FALSE | JSON_TRUE | KEY + | KILL | LAST | LAYOUT | LEADING | LEFT | LIFETIME | LIKE | LIMIT | LIVE | LOCAL | LOGS | MATERIALIZE | MATERIALIZED | MAX | MERGES + | MIN | MODIFY | MOVE | MUTATION | NO | NOT | NULLS | OFFSET | ON | OPTIMIZE | OR | ORDER | OUTER | OUTFILE | OVER | PARTITION + | POPULATE | PRECEDING | PREWHERE | PRIMARY | RANGE | RELOAD | REMOVE | RENAME | REPLACE | REPLICA | REPLICATED | RIGHT | ROLLUP | ROW + | ROWS | SAMPLE | SELECT | SEMI | SENDS | SET | SETTINGS | SHOW | SOURCE | START | STOP | SUBSTRING | SYNC | SYNTAX | SYSTEM | TABLE + | TABLES | TEMPORARY | TEST | THEN | TIES | TIMEOUT | TIMESTAMP | TOTALS | TRAILING | TRIM | TRUNCATE | TO | TOP | TTL | TYPE + | UNBOUNDED | UNION | UPDATE | USE | USING | UUID | VALUES | VIEW | VOLUME | WATCH | WHEN | WHERE | WINDOW | WITH ; keywordForAlias : DATE | FIRST | ID | KEY diff --git a/utils/build/build_msvc2017.bat b/utils/build/build_msvc2017.bat deleted file mode 100644 index 880802999ab..00000000000 --- a/utils/build/build_msvc2017.bat +++ /dev/null @@ -1,14 +0,0 @@ - -:: WINDOWS BUILD NOT SUPPORTED! -:: Script only for development - -cd ../.. -git clone --recursive https://github.com/madler/zlib contrib/zlib -md build -cd build - -:: Stage 1: try build client -cmake .. -G "Visual Studio 15 2017 Win64" -DENABLE_CLICKHOUSE_ALL=0 -DENABLE_CLICKHOUSE_CLIENT=1 > cmake.log -cmake --build . --target clickhouse -- /m > build.log -:: Stage 2: try build minimal server -:: Stage 3: enable all possible features (ssl, ...) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 6916139f952..7137d04a568 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -70,7 +70,7 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet # FIXME: for now only clickhouse-test -pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test +pylint --rcfile=$ROOT_PATH/.pylintrc --persistent=no --score=n $ROOT_PATH/tests/clickhouse-test find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | grep -vP $EXCLUDE_DIRS | @@ -162,7 +162,7 @@ find $ROOT_PATH -name '.gitmodules' | while read i; do grep -F 'url = ' $i | gre find $ROOT_PATH/{src,base,programs} -name '*.h' -or -name '*.cpp' 2>/dev/null | xargs grep -i -F 'General Public License' && echo "There shouldn't be any code snippets under GPL or LGPL" # There shouldn't be any docker containers outside docker directory -find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" +find $ROOT_PATH -not -path $ROOT_PATH'/tests/ci*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name Dockerfile -type f 2>/dev/null | xargs --no-run-if-empty -n1 echo "Please move Dockerfile to docker directory:" # There shouldn't be any docker compose files outside docker directory #find $ROOT_PATH -not -path $ROOT_PATH'/tests/testflows*' -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' -name '*compose*.yml' -type f 2>/dev/null | xargs --no-run-if-empty grep -l "version:" | xargs --no-run-if-empty -n1 echo "Please move docker compose to docker directory:" diff --git a/utils/wal-dump/main.cpp b/utils/wal-dump/main.cpp index 0e47c39fb5a..3566936324b 100644 --- a/utils/wal-dump/main.cpp +++ b/utils/wal-dump/main.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/website/benchmark/hardware/results/macbook_pro_m1_2020.json b/website/benchmark/hardware/results/macbook_pro_m1_2020.json new file mode 100644 index 00000000000..52e62b06d61 --- /dev/null +++ b/website/benchmark/hardware/results/macbook_pro_m1_2020.json @@ -0,0 +1,54 @@ +[ + { + "system": "MacBook Pro M1", + "system_full": "MacBook Pro M1 13\" 2020, 8‑core CPU, 16 GiB RAM, 512 GB SSD", + "time": "2021-10-22 18:17:00", + "kind": "laptop", + "result": + [ +[0.001, 0.001, 0.001], +[0.020, 0.013, 0.012], +[0.046, 0.032, 0.032], +[0.083, 0.045, 0.044], +[0.099, 0.098, 0.099], +[0.361, 0.306, 0.297], +[0.031, 0.028, 0.026], +[0.017, 0.015, 0.015], +[0.530, 0.500, 0.497], +[0.621, 0.633, 0.634], +[0.229, 0.207, 0.207], +[0.259, 0.243, 0.242], +[0.818, 0.796, 0.767], +[1.090, 1.052, 1.055], +[1.053, 1.011, 1.071], +[1.223, 0.966, 0.864], +[14.533, 8.276, 8.041], +[6.470, 8.012, 6.991], +[38.097, 6.530, 8.532], +[0.058, 0.054, 0.052], +[1.341, 1.306, 1.141], +[1.313, 1.330, 1.311], +[3.156, 2.974, 2.919], +[1.665, 1.423, 1.401], +[0.421, 0.337, 0.338], +[0.289, 0.287, 0.300], +[0.348, 0.344, 0.341], +[1.149, 1.142, 1.150], +[1.855, 1.591, 1.984], +[1.691, 1.644, 1.646], +[0.921, 0.750, 0.771], +[1.485, 1.233, 1.011], +[91.560, 10.399, 8.895], +[8.034, 7.663, 7.372], +[6.836, 7.444, 7.235], +[1.263, 1.166, 1.150], +[0.125, 0.118, 0.118], +[0.055, 0.053, 0.053], +[0.043, 0.043, 0.043], +[0.248, 0.243, 0.251], +[0.016, 0.012, 0.012], +[0.011, 0.010, 0.010], +[0.003, 0.002, 0.002] + ] + } +] diff --git a/website/blog/en/2021/clickhouse-v21.10-released.md b/website/blog/en/2021/clickhouse-v21.10-released.md new file mode 100644 index 00000000000..ed0aab88017 --- /dev/null +++ b/website/blog/en/2021/clickhouse-v21.10-released.md @@ -0,0 +1,29 @@ +--- +title: 'ClickHouse v21.10 Released' +image: 'https://blog-images.clickhouse.com/en/2021/clickhouse-v21-10/featured.jpg' +date: '2021-10-14' +author: '[Rich Raposa](https://github.com/rfraposa), [Alexey Milovidov](https://github.com/alexey-milovidov)' +tags: ['company', 'community'] +--- + +We're excited to share with you our first release since [announcing ClickHouse, Inc](https://clickhouse.com/blog/en/2021/clickhouse-inc/). The 21.10 release includes new contributions from multiple contributors including many in our community, and we are grateful for your ongoing ideas, development, and support. Our Engineering team continues to be laser-focused on providing our community and users with the fastest and most scalable OLAP DBMS available while implementing many new features. In the 21.10 release, we have a wonderful 79 contributors with 1255 commits across 211 pull requests - what an amazing community and we cherish your contributions. + +Let's highlight some of these new exciting new capabilities in 21.10: + +* User-defined functions (UDFs) can now be [created as lambda expressions](https://clickhouse.com/docs/en/sql-reference/functions/#higher-order-functions). For example, `CREATE FUNCTION plus_one as (a) -> a + 1` +* Two new table engines: Executable and ExecutablePool which allow you to stream the results of a query to a custom shell script +* Instead of logging every query (which can be a lot of logs!), you can now log a random sample of your queries. The number of queries logged is determined by defining a specified probability between 0.0 (no queries logged) and 1.0 (all queries logged) using the new `log_queries_probability` setting. +* Positional arguments are now available in your GROUP BY, ORDER BY and LIMIT BY clauses. For example, `SELECT foo, bar, baz FROM my_table ORDER BY 2,3` orders the results by whatever the bar and baz columns (no need to specify column names twice!) + +We're also thrilled to announce some new free training available to you in our Learn ClickHouse portal: [https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/](https://clickhouse.com/learn/lessons/whatsnew-clickhouse-21.10/) + +We're always listening for new ideas, and we're happy to welcome new contributors to the ClickHouse project. Whether for submitting code or improving our documentation and examples, please get involved by sending us a pull request or submitting an issue. Our beginner developers contribution guide will help you get started: [https://clickhouse.com/docs/en/development/developer-instruction/](https://clickhouse.com/docs/en/development/developer-instruction/) + + +## ClickHouse Release Notes + +Release 21.10 + +Release Date: 2021-10-17 + +Release Notes: [21.10](https://github.com/ClickHouse/ClickHouse/blob/master/CHANGELOG.md) diff --git a/website/css/main.css b/website/css/main.css index 229b74cb3f7..1d0b6f5c116 100644 --- a/website/css/main.css +++ b/website/css/main.css @@ -1 +1 @@ -@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:flex;justify-content:center}.btns .btn+.btn{margin-left:24px}.btns .btn-lg+.btn-lg{margin-left:40px}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(10%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(60%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3,.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s}}@media screen and (min-width:980px){.case-study-card .col-lg-3{transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3,.case-study-card.is-open .col-lg-auto{opacity:1;transform:none}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{transition-delay:.4s}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page{overflow:hidden;width:100vw}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;overflow:hidden;position:relative;width:100%}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{color:#495057;list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file +@media screen and (max-width:978.98px){.btn{padding:8px 16px}}@media screen and (max-width:978.98px){.btn-lg{padding:12px 24px}}.btn-primary,.btn-primary:active,.btn-primary:hover{color:#212529}.btn-outline-primary{background:#fffaf0;border-color:#fc0;color:#212529}.btn-outline-primary:active,.btn-outline-primary:hover{background:#fc0;border-color:#fc0;color:#212529}.btn-secondary{border-color:#212529;color:#fff}.btn-outline-secondary,.btn-secondary:active,.btn-secondary:hover{background:#fff;border-color:#212529;color:#212529}.btn-outline-secondary:active,.btn-outline-secondary:hover{background:#212529;border-color:#212529;color:#fff}.btn-tertiary{border-color:#257af4;color:#fff}.btn-tertiary:active,.btn-tertiary:hover{background:#257af4;border-color:#257af4;color:#fff}.btn-outline-tertiary{background:#e3f1fe;color:#257af4}.btn-outline-tertiary:active,.btn-outline-tertiary:hover{background:#257af4;color:#fff}.btns{align-items:center;display:flex;justify-content:center}.btns .btn+.btn{margin-left:24px}.btns .btn-lg+.btn-lg{margin-left:40px}.card{box-shadow:0 8px 20px rgba(108,117,125,.2);overflow:hidden;transition:box-shadow .2s,transform .2s;width:100%}.card,.card-body{position:relative}.card-body{z-index:10}.card.is-large .card-body{padding:40px}.card.bg-primary-light{border-color:#fc0}.card.has-dark-border{border-color:#6c757d}.card.has-pattern:after,.card.has-pattern:before{background-repeat:no-repeat;background-size:auto 100%;bottom:0;content:"";display:block;position:absolute;top:0;width:72px}.card.has-pattern:before{background-image:url(../images/backgrounds/bg-card-pattern-blue-1.png);background-position:0 0;left:0}.card.has-pattern:after{background-image:url(../images/backgrounds/bg-card-pattern-blue-2.png);background-position:100% 0;right:0}.card.has-hover:active,.card.has-hover:hover,a.card:active,a.card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2);transform:translateY(-8px)}.card.has-highlight:after,.card.has-hover:after,a.card:after{content:"";display:block;height:8px;margin-top:auto;transition:background .2s;width:100%}.card.has-highlight:after,.card.has-hover:active:after,.card.has-hover:hover:after,a.card:active:after,a.card:hover:after{background:#e3f1fe}.case-study-cards{-moz-column-gap:40px;column-gap:40px;display:grid;grid-template-columns:1fr;row-gap:40px;padding-bottom:40px;position:relative}.case-study-cards>div{align-items:stretch;display:flex}.case-study-cards:before{background:#d6dbdf;bottom:0;content:"";display:block;left:20px;position:absolute;top:40px;width:100vw}@media screen and (min-width:980px){.case-study-cards{grid-template-columns:repeat(2,minmax(0,1fr));row-gap:80px;padding-bottom:120px}.case-study-cards:before{left:-40px;top:120px}}.case-study-card{align-items:stretch;flex-direction:row;flex-shrink:0;left:0;transition:box-shadow .2s,left .4s,width .4s,z-index 0s;transition-delay:0s,.6s,.6s,0s;width:100%;z-index:2}@media screen and (max-width:979.98px){.case-study-card .row{min-height:0!important}}@media screen and (min-width:980px){.case-study-card:active,.case-study-card:hover{box-shadow:0 12px 32px rgba(108,117,125,.2)}.case-study-card:not(.is-open){cursor:pointer}.case-study-card.is-open{transform:none!important;transition-delay:0s,0s,0s,0s;width:calc(200% + 40px);z-index:10}.case-study-card.is-closing{z-index:10}.case-study-card.open-left.is-open{left:calc(-100% - 40px)}.case-study-card:before{background:no-repeat url(../images/backgrounds/bg-card-pattern-red.png);background-position:100%;background-size:contain;content:"";display:block;height:calc(100% - 80px);max-height:224px;max-width:234px;position:absolute;right:0;top:40px;transform:translateX(30%);transition:transform .4s;transition-delay:.6s;width:100%;z-index:1}}@media screen and (min-width:980px)and (min-width:1240px){.case-study-card:before{transform:translateX(10%)}}@media screen and (min-width:980px){.case-study-card.is-open:before{transform:translateX(60%);transition-delay:0s}}@media screen and (min-width:980px){.case-study-card-wrap{align-items:stretch;display:flex;flex-shrink:0;min-height:304px;position:relative;transition:width .4s;transition-delay:.6s;width:calc(200% + 42px);z-index:2}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-wrap{transition-delay:0s;width:100%}}@media screen and (min-width:980px){.case-study-card-body{display:flex;flex-direction:column;padding-right:80px!important}.case-study-card-body>.row{align-self:stretch}}@media screen and (min-width:980px){.case-study-card-toggle{background:#fff;box-shadow:0 8px 20px rgba(108,117,125,.2);border-radius:100%;cursor:pointer;height:56px;position:relative;width:56px}.case-study-card-toggle:after,.case-study-card-toggle:before{background:#257af4;content:"";display:block;height:4px;left:calc(50% - 15px);position:absolute;top:calc(50% - 2px);transition:opacity .2s,transform .2s;width:30px}.case-study-card-toggle:after{transform:rotate(90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:before{opacity:0;transform:rotate(-90deg)}}@media screen and (min-width:980px){.case-study-card.is-open .case-study-card-toggle:after{transform:rotate(0)}}@media screen and (min-width:980px){.case-study-card .col-lg-3,.case-study-card .col-lg-auto{opacity:0;transform:translateX(24px);transition:opacity .4s,transform .4s}}@media screen and (min-width:980px){.case-study-card .col-lg-3{transition-delay:0s}}@media screen and (min-width:980px){.case-study-card .col-lg-auto{transition-delay:.2s}}@media screen and (min-width:980px)and (min-width:980px){.case-study-card .col-lg-auto{max-width:605px;width:calc(100% - 319px)}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3,.case-study-card.is-open .col-lg-auto{opacity:1;transform:none}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-3{transition-delay:.4s}}@media screen and (min-width:980px){.case-study-card.is-open .col-lg-auto{transition-delay:.2s}}.footer-copy{white-space:nowrap}form .form-control{border:1px solid #6c757d;border-radius:6px;height:auto;line-height:20px;min-height:44px;padding:12px 16px;width:100%}form .form-control,form .form-control:focus{box-shadow:0 8px 20px rgba(108,117,125,.2);color:#212529}form .form-control:focus{border-color:#212529}form .form-control::-moz-placeholder{color:#6c757d}form .form-control:-ms-input-placeholder{color:#6c757d}form .form-control::placeholder{color:#6c757d}form select.form-control{-webkit-appearance:none;-moz-appearance:none;appearance:none}form select.form-control:not([data-chosen]){color:#6c757d}form .btn-secondary:active,form .btn-secondary:hover{color:#212529;background:#fc0;border-color:#fc0}.hero{overflow:visible;position:relative}.hero,.hero-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.hero-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.hero>.container{position:relative;z-index:2}.hero.has-offset{margin-bottom:-160px;padding-bottom:160px}.base-hero{height:22.5vw;max-height:324px;min-height:280px}.index-hero{background-image:url(/images/backgrounds/bg-hero-home.svg);height:68vw;max-height:980px}.index-hero,.other-hero{max-width:2448px;width:170vw}.other-hero{background-image:url(/images/backgrounds/bg-hero.svg)}.bg-footer-cta{background-image:url(/images/backgrounds/bg-footer-cta.svg);width:2448px}.quickstart-bg{background-image:url(/images/backgrounds/bg-quick-start.svg);height:40vw;top:220px;width:170vw}hr{background:#f1f6f9;border:0;display:block;height:4px;margin:0;width:100%}hr.is-small{height:2px}hr.is-large{height:8px}hr.is-medium{background:#d6dbdf}hr.is-dark{background:#495057}hr.is-yellow{background:linear-gradient(90deg,#ff8c00,#ff8c00 8px,#fc0 16px,rgba(255,204,0,0));-webkit-clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);clip-path:polygon(8px 100%,0 100%,0 0,8px 0,8px 100%,16px 100%,16px 0,100% 0,100% 100%);height:8px}.icon{display:block;height:48px;margin-bottom:24px;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center}@media screen and (min-width:576px){.icon{height:64px}}@media screen and (min-width:980px){.icon{height:80px}}img{max-width:100%}.kicker{color:#6c757d;font-family:Hind Siliguri,sans-serif;font-size:.875rem;font-weight:600;letter-spacing:1px;margin:0}@media screen and (max-width:978.98px){.lead{font-size:1.125rem}}.navbar-clickhouse{border-bottom:4px solid #f1f6f9;height:142px}.navbar-clickhouse>.container{flex-wrap:wrap}.navbar-super{flex-shrink:0;width:100%}.navbar-super ul{list-style:none}.navbar-super li:not(:last-child){margin-bottom:0;margin-right:24px}.navbar-super a{align-items:center;color:#212529;display:flex;font-size:.875rem}.navbar-super a:active,.navbar-super a:hover{color:#257af4;text-decoration:none}.navbar-super img{flex-shrink:0;margin-right:4px}.navbar-brand-clickhouse{background:no-repeat url(../images/logo-clickhouse.svg);background-size:contain;flex-shrink:0;height:28px;margin-right:48px;padding:0;width:180px}.navbar-nav{align-items:center;height:46px}.navbar .nav-item:not(:last-child){margin-bottom:0;margin-right:24px}.navbar .nav-link{color:#212529}.navbar .nav-link:active,.navbar .nav-link:hover{color:#257af4}.navbar .navbar-nav{flex-direction:row}@media screen and (max-width:978.98px){.navbar>.container{padding-left:20px;padding-right:20px}.navbar .navbar-toggler{height:24px;padding:0;width:24px}.navbar .navbar-toggler:focus{outline:none}.navbar .navbar-toggler-icon{background:no-repeat url(../images/icons/icon-menu.svg);background-position:50%;background-size:contain;height:24px;width:24px}.navbar .navbar-collapse{background:#fff;border-bottom:4px solid #f1f6f9;height:56px;left:0;padding:0 20px 16px;position:absolute;right:0;top:100%}.navbar .nav-link{font-size:.875rem;white-space:nowrap}}@media screen and (max-width:615.98px){.navbar .navbar-collapse{height:auto}.navbar .navbar-nav{flex-direction:column;height:auto}.navbar .nav-item:not(:last-child){margin-bottom:16px;margin-right:0}}@media screen and (max-width:399.98px){.navbar{height:80px}}.page,.photo-frame{overflow:hidden;width:100%}.photo-frame{background:hsla(0,0%,100%,.6);border-radius:100%;box-shadow:0 8px 20px rgba(108,117,125,.2);display:block;margin-bottom:24px;max-width:160px;position:relative}.photo-frame:before{content:"";display:block;padding-bottom:100%;width:100%}.photo-frame img{display:block;height:100%;left:0;-o-object-fit:contain;object-fit:contain;-o-object-position:center;object-position:center;position:absolute;top:0;width:100%}.pullquote{position:relative;width:70%}.pullquote:before{background:no-repeat url(../images/backgrounds/bg-quotes.svg);background-position:50%;background-size:100%;content:"";mix-blend-mode:multiply;right:56px;width:calc(100% - 16px);z-index:2}.pullquote-bg,.pullquote:before{bottom:0;display:block;position:absolute;top:0}.pullquote-bg{right:0;width:calc(50vw + 28.57143%);z-index:1}.pullquote-body{padding:64px 40px 64px 0;position:relative;z-index:3}.pullquote-quote{font-family:Hind Siliguri,sans-serif;font-size:32px;font-weight:700}.pullquote-citation{font-size:1.125rem}.section{overflow:visible;position:relative}.section,.section-bg{background-repeat:no-repeat;background-position:50%;background-size:cover}.section-bg{display:block;height:100%;left:50%;position:absolute;top:0;transform:translateX(-50%);z-index:1}.section>.container{position:relative;z-index:2}.social-icons{align-items:center;display:flex}.social-icons>a{aspect-ratio:24/24;background:#6c757d;display:block;height:24px;width:24px;-webkit-mask-position:center;mask-position:center;-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat;-webkit-mask-size:contain;mask-size:contain;transition:background .2s}.social-icons>a:active,.social-icons>a:hover{background:#212529}.social-icons>a+a{margin-left:32px}.social-icons-facebook{-webkit-mask-image:url(/images/icons/icon-facebook-gray.svg);mask-image:url(/images/icons/icon-facebook-gray.svg)}.social-icons-twitter{-webkit-mask-image:url(/images/icons/icon-twitter-gray.svg);mask-image:url(/images/icons/icon-twitter-gray.svg);width:31px}.social-icons-linkedin{-webkit-mask-image:url(/images/icons/icon-linkedin-gray.svg);mask-image:url(/images/icons/icon-linkedin-gray.svg)}.social-icons-linkedin-alt{-webkit-mask-image:url(/images/icons/icon-linkedin-alt-gray.svg);mask-image:url(/images/icons/icon-linkedin-alt-gray.svg)}.social-icons.size-small>a{height:20px;width:20px}.social-icons.size-small>a:active,.social-icons.size-small>a:hover{background:#212529}.social-icons.size-small>a+a{margin-left:16px}.tabs{position:relative}.tabs:before{background:#fff;border-radius:7px 7px 0 0;content:"";display:block;height:8px;left:1px;position:absolute;right:1px;top:68px;z-index:10}@media screen and (min-width:1240px){.tabs:before{top:76px}}.tabs-body{background:#fff;border-radius:8px;border:1px solid #6c757d;box-shadow:0 8px 20px rgba(108,117,125,.2);padding:24px}@media screen and (min-width:980px){.tabs-body{padding:32px}}@media screen and (min-width:1240px){.tabs-body{padding:40px}}.tabs .nav-tabs{border-bottom:0;flex-wrap:nowrap;height:76px;margin:-20px -20px -9px;-webkit-mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);mask-image:linear-gradient(90deg,transparent,#000 20px,#000 calc(100% - 20px),transparent);overflow:scroll;overflow-x:scroll;overflow-y:visible;padding:20px 20px 0;position:relative}@media screen and (min-width:940px){.tabs .nav-tabs{overflow:visible}}@media screen and (min-width:1240px){.tabs .nav-tabs{height:84px}}.tabs .nav-link{align-items:center;border-bottom:0;color:#6c757d;display:flex;font-size:.875rem;flex-shrink:0;height:56px;justify-content:center;padding:0 12px 8px;text-align:center;white-space:nowrap}@media screen and (min-width:1240px){.tabs .nav-link{height:64px;padding:0 16px 8px}}.tabs .nav-link.active{background:#fff;box-shadow:0 -4px 8px rgba(108,117,125,.1);font-weight:700;padding:0 16px 8px}@media screen and (min-width:980px){.tabs .nav-link.active{padding:0 24px 8px}}@media screen and (min-width:1240px){.tabs .nav-link.active{padding:0 32px 8px}}.tab-pane pre{background:#212529;border-radius:16px;color:#fff;padding:24px 16px}@media screen and (min-width:1240px){.tab-pane pre{padding:32px 24px}}.trailing-link{align-items:center;color:#212529;display:flex;font-size:.875rem;font-weight:700}.trailing-link:after{background:no-repeat url(../images/icons/icon-arrow.svg);background-position:100%;background-size:contain;content:"";display:block;height:12px;transition:transform .2s;width:20px}.trailing-link:active,.trailing-link:hover{color:#212529;text-decoration:none}.trailing-link:active:after,.trailing-link:hover:after{transform:translateX(8px)}.trailing-link.span-full:after{margin-left:auto}ul{color:#495057;list-style-type:square;padding-left:1.25em}ul li:not(:last-child){margin-bottom:16px}ul li::marker{color:#ff3939}ul.has-separators{list-style:none;padding:0}ul.has-separators li:not(:last-child){border-bottom:4px solid #f1f6f9;margin-bottom:24px;padding-bottom:24px}.bg-gradient-secondary{background-image:linear-gradient(58deg,#ff6443 3%,#fe561d 24%,#e32f0d 93%)}.bg-gradient-light-orange{background-image:linear-gradient(90deg,rgba(255,203,128,0),#ffcb80)}.bg-offset-right{bottom:0;left:-24px;position:absolute;top:0;width:calc(100vw + 24px);z-index:-1}@media screen and (min-width:1240px){.bg-offset-right{left:-96px;width:calc(100vw + 96px)}}.bg-inset-right{bottom:0;left:40px;position:absolute;top:0;width:calc(100vw - 40px);z-index:-1}@media screen and (min-width:980px){.bg-inset-right{left:96px;width:calc(100vw - 96px)}}.has-border-left{border-left:8px solid #f1f6f9;padding-left:16px}.font-xl{font-size:1.25rem}.font-lg{font-size:1.125rem}.font-sm{font-size:.875rem}.font-xs{font-size:.625rem}.font-weight-semibold{font-weight:600}.display-5{color:#212529;font-size:20px;font-weight:500}.display-6{color:#212529;font-size:14px;font-weight:700}.text-decoration-underline{text-decoration:underline}.text-upper{text-transform:uppercase} \ No newline at end of file diff --git a/website/src/scss/components/_page.scss b/website/src/scss/components/_page.scss index 926f9656b73..a411ff7a15c 100644 --- a/website/src/scss/components/_page.scss +++ b/website/src/scss/components/_page.scss @@ -1,4 +1,4 @@ .page { overflow: hidden; - width: 100vw; + width: 100%; } diff --git a/website/templates/global/nav.html b/website/templates/global/nav.html index 7e120c99126..d775c88e4a5 100644 --- a/website/templates/global/nav.html +++ b/website/templates/global/nav.html @@ -42,7 +42,7 @@ Documentation